1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -mcpu=skx -S -loop-vectorize -instcombine -simplifycfg -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED
3; RUN: opt -mcpu=skx -S -loop-vectorize -instcombine -simplifycfg -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses  -enable-masked-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED
4
5target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
6target triple = "i386-unknown-linux-gnu"
7
8; When masked-interleaved-groups are disabled:
9; Check that the predicated load is not vectorized as an
10; interleaved-group but rather as a scalarized accesses.
11; (For SKX, Gather is not supported by the compiler for chars, therefore
12;  the only remaining alternative is to scalarize).
13; In this case a scalar epilogue is not needed.
14;
15; When  masked-interleave-group is enabled we expect to find the proper mask
16; shuffling code, feeding the wide masked load for an interleave-group (with
17; a single member).
18; Since the last (second) member of the load-group is a gap, peeling is used,
19; so we also expect to find a scalar epilogue loop.
20;
21; void masked_strided1(const unsigned char* restrict p,
22;                      unsigned char* restrict q,
23;                      unsigned char guard) {
24;   for(ix=0; ix < 1024; ++ix) {
25;     if (ix > guard) {
26;         char t = p[2*ix];
27;         q[ix] = t;
28;     }
29;   }
30; }
31
32define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
33; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1(
34; DISABLED_MASKED_STRIDED-NEXT:  entry:
35; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
36; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
37; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
38; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
39; DISABLED_MASKED_STRIDED:       vector.body:
40; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
41; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
42; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
43; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
44; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
45; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
46; DISABLED_MASKED_STRIDED:       pred.load.if:
47; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
48; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
49; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
50; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0
51; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
52; DISABLED_MASKED_STRIDED:       pred.load.continue:
53; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
54; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
55; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
56; DISABLED_MASKED_STRIDED:       pred.load.if1:
57; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
58; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
59; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
60; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i32 1
61; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
62; DISABLED_MASKED_STRIDED:       pred.load.continue2:
63; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
64; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
65; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
66; DISABLED_MASKED_STRIDED:       pred.load.if3:
67; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
68; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
69; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
70; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i32 2
71; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
72; DISABLED_MASKED_STRIDED:       pred.load.continue4:
73; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ]
74; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
75; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
76; DISABLED_MASKED_STRIDED:       pred.load.if5:
77; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
78; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
79; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
80; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i32 3
81; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
82; DISABLED_MASKED_STRIDED:       pred.load.continue6:
83; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
84; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
85; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
86; DISABLED_MASKED_STRIDED:       pred.load.if7:
87; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
88; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
89; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
90; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i32 4
91; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
92; DISABLED_MASKED_STRIDED:       pred.load.continue8:
93; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ]
94; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
95; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
96; DISABLED_MASKED_STRIDED:       pred.load.if9:
97; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
98; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
99; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
100; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i32 5
101; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
102; DISABLED_MASKED_STRIDED:       pred.load.continue10:
103; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ]
104; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
105; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
106; DISABLED_MASKED_STRIDED:       pred.load.if11:
107; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6
108; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
109; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
110; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i32 6
111; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
112; DISABLED_MASKED_STRIDED:       pred.load.continue12:
113; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ]
114; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
115; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
116; DISABLED_MASKED_STRIDED:       pred.load.if13:
117; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7
118; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
119; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
120; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i32 7
121; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
122; DISABLED_MASKED_STRIDED:       pred.load.continue14:
123; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
124; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
125; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>*
126; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]])
127; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
128; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
129; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
130; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
131; DISABLED_MASKED_STRIDED:       for.end:
132; DISABLED_MASKED_STRIDED-NEXT:    ret void
133;
134; ENABLED_MASKED_STRIDED-LABEL: @masked_strided1(
135; ENABLED_MASKED_STRIDED-NEXT:  entry:
136; ENABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
137; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
138; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
139; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
140; ENABLED_MASKED_STRIDED:       vector.body:
141; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
142; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
143; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
144; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
145; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
146; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
147; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
148; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
149; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
150; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
151; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i8>*
152; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP5]], i32 1, <8 x i1> [[TMP0]])
153; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
154; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
155; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016
156; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
157; ENABLED_MASKED_STRIDED:       for.body:
158; ENABLED_MASKED_STRIDED-NEXT:    [[IX_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1016, [[VECTOR_BODY]] ]
159; ENABLED_MASKED_STRIDED-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[IX_09]], [[CONV]]
160; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
161; ENABLED_MASKED_STRIDED:       if.then:
162; ENABLED_MASKED_STRIDED-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IX_09]], 1
163; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[MUL]]
164; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
165; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[IX_09]]
166; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP7]], i8* [[ARRAYIDX3]], align 1
167; ENABLED_MASKED_STRIDED-NEXT:    br label [[FOR_INC]]
168; ENABLED_MASKED_STRIDED:       for.inc:
169; ENABLED_MASKED_STRIDED-NEXT:    [[INC]] = add nuw nsw i32 [[IX_09]], 1
170; ENABLED_MASKED_STRIDED-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024
171; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !2
172; ENABLED_MASKED_STRIDED:       for.end:
173; ENABLED_MASKED_STRIDED-NEXT:    ret void
174;
175entry:
176  %conv = zext i8 %guard to i32
177  br label %for.body
178
179for.body:
180  %ix.09 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
181  %cmp1 = icmp ugt i32 %ix.09, %conv
182  br i1 %cmp1, label %if.then, label %for.inc
183
184if.then:
185  %mul = shl nuw nsw i32 %ix.09, 1
186  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
187  %0 = load i8, i8* %arrayidx, align 1
188  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09
189  store i8 %0, i8* %arrayidx3, align 1
190  br label %for.inc
191
192for.inc:
193  %inc = add nuw nsw i32 %ix.09, 1
194  %exitcond = icmp eq i32 %inc, 1024
195  br i1 %exitcond, label %for.end, label %for.body
196
197for.end:
198  ret void
199}
200
201; Exactly the same scenario except we are now optimizing for size, therefore
202; we check that no scalar epilogue is created. Since we can't create an epilog
203; we need the ability to mask out the gaps.
204; When enable-masked-interleaved-access is enabled, the interleave-groups will
205; be vectorized with masked wide-loads with the mask properly shuffled and
206; And-ed with the gaps mask.
207;
208define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize {
209; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize(
210; DISABLED_MASKED_STRIDED-NEXT:  entry:
211; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
212; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
213; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
214; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
215; DISABLED_MASKED_STRIDED:       vector.body:
216; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
217; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
218; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
219; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
220; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
221; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
222; DISABLED_MASKED_STRIDED:       pred.load.if:
223; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
224; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
225; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
226; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0
227; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
228; DISABLED_MASKED_STRIDED:       pred.load.continue:
229; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
230; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
231; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
232; DISABLED_MASKED_STRIDED:       pred.load.if1:
233; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
234; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
235; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
236; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i32 1
237; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
238; DISABLED_MASKED_STRIDED:       pred.load.continue2:
239; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
240; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
241; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
242; DISABLED_MASKED_STRIDED:       pred.load.if3:
243; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
244; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
245; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
246; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i32 2
247; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
248; DISABLED_MASKED_STRIDED:       pred.load.continue4:
249; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ]
250; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
251; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
252; DISABLED_MASKED_STRIDED:       pred.load.if5:
253; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
254; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
255; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
256; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i32 3
257; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
258; DISABLED_MASKED_STRIDED:       pred.load.continue6:
259; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
260; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
261; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
262; DISABLED_MASKED_STRIDED:       pred.load.if7:
263; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
264; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
265; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
266; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i32 4
267; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
268; DISABLED_MASKED_STRIDED:       pred.load.continue8:
269; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ]
270; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
271; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
272; DISABLED_MASKED_STRIDED:       pred.load.if9:
273; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
274; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
275; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
276; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i32 5
277; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
278; DISABLED_MASKED_STRIDED:       pred.load.continue10:
279; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ]
280; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
281; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
282; DISABLED_MASKED_STRIDED:       pred.load.if11:
283; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6
284; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
285; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
286; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i32 6
287; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
288; DISABLED_MASKED_STRIDED:       pred.load.continue12:
289; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ]
290; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
291; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
292; DISABLED_MASKED_STRIDED:       pred.load.if13:
293; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7
294; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
295; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
296; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i32 7
297; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
298; DISABLED_MASKED_STRIDED:       pred.load.continue14:
299; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
300; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
301; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>*
302; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]])
303; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
304; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
305; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
306; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !2
307; DISABLED_MASKED_STRIDED:       for.end:
308; DISABLED_MASKED_STRIDED-NEXT:    ret void
309;
310; ENABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize(
311; ENABLED_MASKED_STRIDED-NEXT:  entry:
312; ENABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
313; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
314; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
315; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
316; ENABLED_MASKED_STRIDED:       vector.body:
317; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
318; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
319; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
320; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
321; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
322; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
323; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
324; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
325; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> undef)
326; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
327; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
328; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>*
329; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]])
330; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
331; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
332; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
333; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
334; ENABLED_MASKED_STRIDED:       for.end:
335; ENABLED_MASKED_STRIDED-NEXT:    ret void
336;
337entry:
338  %conv = zext i8 %guard to i32
339  br label %for.body
340
341for.body:
342  %ix.09 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
343  %cmp1 = icmp ugt i32 %ix.09, %conv
344  br i1 %cmp1, label %if.then, label %for.inc
345
346if.then:
347  %mul = shl nuw nsw i32 %ix.09, 1
348  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
349  %0 = load i8, i8* %arrayidx, align 1
350  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09
351  store i8 %0, i8* %arrayidx3, align 1
352  br label %for.inc
353
354for.inc:
355  %inc = add nuw nsw i32 %ix.09, 1
356  %exitcond = icmp eq i32 %inc, 1024
357  br i1 %exitcond, label %for.end, label %for.body
358
359for.end:
360  ret void
361}
362
363
364; Accesses with gaps under Optsize scenario again, with unknown trip-count
365; this time, in order to check the behavior of folding-the-tail (folding the
366; remainder loop into the main loop using masking) together with interleaved-
367; groups.
368; When masked-interleave-group is disabled the interleave-groups will be
369; invalidated during Legality checks; So there we check for no epilogue
370; and for scalarized conditional accesses.
371; When masked-interleave-group is enabled we check that there is no epilogue,
372; and that the interleave-groups are vectorized using proper masking (with
373; shuffling of the mask feeding the wide masked load/store).
374; The mask itself is an And of two masks: one that masks away the remainder
375; iterations, and one that masks away the 'else' of the 'if' statement.
376; The shuffled mask is also And-ed with the gaps mask.
377;
378; void masked_strided1_optsize_unknown_tc(const unsigned char* restrict p,
379;                      unsigned char* restrict q,
380;                      unsigned char guard,
381;                      int n) {
382;   for(ix=0; ix < n; ++ix) {
383;     if (ix > guard) {
384;         char t = p[2*ix];
385;         q[ix] = t;
386;     }
387;   }
388; }
389;
390define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize {
391; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize_unknown_tc(
392; DISABLED_MASKED_STRIDED-NEXT:  entry:
393; DISABLED_MASKED_STRIDED-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
394; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
395; DISABLED_MASKED_STRIDED:       vector.ph:
396; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
397; DISABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
398; DISABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
399; DISABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
400; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
401; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
402; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
403; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
404; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
405; DISABLED_MASKED_STRIDED:       vector.body:
406; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ]
407; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE16]] ]
408; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
409; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
410; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
411; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
412; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
413; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
414; DISABLED_MASKED_STRIDED:       pred.load.if:
415; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0
416; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
417; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
418; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> undef, i8 [[TMP7]], i32 0
419; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
420; DISABLED_MASKED_STRIDED:       pred.load.continue:
421; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
422; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
423; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
424; DISABLED_MASKED_STRIDED:       pred.load.if3:
425; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1
426; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
427; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1
428; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i32 1
429; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
430; DISABLED_MASKED_STRIDED:       pred.load.continue4:
431; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = phi <8 x i8> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
432; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
433; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
434; DISABLED_MASKED_STRIDED:       pred.load.if5:
435; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2
436; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]]
437; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1
438; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i32 2
439; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
440; DISABLED_MASKED_STRIDED:       pred.load.continue6:
441; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = phi <8 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP20]], [[PRED_LOAD_IF5]] ]
442; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
443; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
444; DISABLED_MASKED_STRIDED:       pred.load.if7:
445; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3
446; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]]
447; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1
448; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i32 3
449; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
450; DISABLED_MASKED_STRIDED:       pred.load.continue8:
451; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = phi <8 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP26]], [[PRED_LOAD_IF7]] ]
452; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
453; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
454; DISABLED_MASKED_STRIDED:       pred.load.if9:
455; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4
456; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]]
457; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1
458; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i32 4
459; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
460; DISABLED_MASKED_STRIDED:       pred.load.continue10:
461; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = phi <8 x i8> [ [[TMP27]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
462; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
463; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
464; DISABLED_MASKED_STRIDED:       pred.load.if11:
465; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5
466; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]]
467; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1
468; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i32 5
469; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
470; DISABLED_MASKED_STRIDED:       pred.load.continue12:
471; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = phi <8 x i8> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP38]], [[PRED_LOAD_IF11]] ]
472; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
473; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
474; DISABLED_MASKED_STRIDED:       pred.load.if13:
475; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6
476; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]]
477; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1
478; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i32 6
479; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
480; DISABLED_MASKED_STRIDED:       pred.load.continue14:
481; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = phi <8 x i8> [ [[TMP39]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP44]], [[PRED_LOAD_IF13]] ]
482; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
483; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16]]
484; DISABLED_MASKED_STRIDED:       pred.load.if15:
485; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7
486; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]]
487; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1
488; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i32 7
489; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
490; DISABLED_MASKED_STRIDED:       pred.load.continue16:
491; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ]
492; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
493; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = bitcast i8* [[TMP52]] to <8 x i8>*
494; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP51]], <8 x i8>* [[TMP53]], i32 1, <8 x i1> [[TMP3]])
495; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
496; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
497; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
498; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !3
499; DISABLED_MASKED_STRIDED:       for.end:
500; DISABLED_MASKED_STRIDED-NEXT:    ret void
501;
502; ENABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize_unknown_tc(
503; ENABLED_MASKED_STRIDED-NEXT:  entry:
504; ENABLED_MASKED_STRIDED-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
505; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
506; ENABLED_MASKED_STRIDED:       vector.ph:
507; ENABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
508; ENABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
509; ENABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
510; ENABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
511; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
512; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
513; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
514; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
515; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
516; ENABLED_MASKED_STRIDED:       vector.body:
517; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
518; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
519; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
520; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
521; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1
522; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
523; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
524; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
525; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
526; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
527; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[TMP6]], <16 x i8> undef)
528; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
529; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
530; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>*
531; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]])
532; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
533; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
534; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
535; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !5
536; ENABLED_MASKED_STRIDED:       for.end:
537; ENABLED_MASKED_STRIDED-NEXT:    ret void
538;
539entry:
540  %cmp9 = icmp sgt i32 %n, 0
541  br i1 %cmp9, label %for.body.lr.ph, label %for.end
542
543for.body.lr.ph:
544  %conv = zext i8 %guard to i32
545  br label %for.body
546
547for.body:
548  %ix.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
549  %cmp1 = icmp ugt i32 %ix.010, %conv
550  br i1 %cmp1, label %if.then, label %for.inc
551
552if.then:
553  %mul = shl nuw nsw i32 %ix.010, 1
554  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
555  %0 = load i8, i8* %arrayidx, align 1
556  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.010
557  store i8 %0, i8* %arrayidx3, align 1
558  br label %for.inc
559
560for.inc:
561  %inc = add nuw nsw i32 %ix.010, 1
562  %exitcond = icmp eq i32 %inc, %n
563  br i1 %exitcond, label %for.end.loopexit, label %for.body
564
565for.end.loopexit:
566  br label %for.end
567
568for.end:
569  ret void
570}
571
572; Same, with stride 3. This is to check the gaps-mask and the shuffled mask
573; with a different stride.
574; So accesses are with gaps under Optsize scenario again, with unknown trip-
575; count, in order to check the behavior of folding-the-tail (folding the
576; remainder loop into the main loop using masking) together with interleaved-
577; groups.
578; When masked-interleave-group is enabled we check that there is no epilogue,
579; and that the interleave-groups are vectorized using proper masking (with
580; shuffling of the mask feeding the wide masked load/store).
581; The mask itself is an And of two masks: one that masks away the remainder
582; iterations, and one that masks away the 'else' of the 'if' statement.
583; The shuffled mask is also And-ed with the gaps mask.
584;
585; void masked_strided3_optsize_unknown_tc(const unsigned char* restrict p,
586;                      unsigned char* restrict q,
587;                      unsigned char guard,
588;                      int n) {
589;   for(ix=0; ix < n; ++ix) {
590;     if (ix > guard) {
591;         char t = p[3*ix];
592;         q[ix] = t;
593;     }
594;   }
595; }
596;
597define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize {
598; DISABLED_MASKED_STRIDED-LABEL: @masked_strided3_optsize_unknown_tc(
599; DISABLED_MASKED_STRIDED-NEXT:  entry:
600; DISABLED_MASKED_STRIDED-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
601; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
602; DISABLED_MASKED_STRIDED:       vector.ph:
603; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
604; DISABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
605; DISABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
606; DISABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
607; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
608; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
609; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
610; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
611; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
612; DISABLED_MASKED_STRIDED:       vector.body:
613; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ]
614; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE16]] ]
615; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
616; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
617; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
618; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
619; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
620; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
621; DISABLED_MASKED_STRIDED:       pred.load.if:
622; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0
623; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
624; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
625; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> undef, i8 [[TMP7]], i32 0
626; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
627; DISABLED_MASKED_STRIDED:       pred.load.continue:
628; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
629; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
630; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
631; DISABLED_MASKED_STRIDED:       pred.load.if3:
632; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1
633; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
634; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1
635; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i32 1
636; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
637; DISABLED_MASKED_STRIDED:       pred.load.continue4:
638; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = phi <8 x i8> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
639; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
640; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
641; DISABLED_MASKED_STRIDED:       pred.load.if5:
642; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2
643; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]]
644; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1
645; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i32 2
646; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
647; DISABLED_MASKED_STRIDED:       pred.load.continue6:
648; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = phi <8 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP20]], [[PRED_LOAD_IF5]] ]
649; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
650; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
651; DISABLED_MASKED_STRIDED:       pred.load.if7:
652; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3
653; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]]
654; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1
655; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i32 3
656; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
657; DISABLED_MASKED_STRIDED:       pred.load.continue8:
658; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = phi <8 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP26]], [[PRED_LOAD_IF7]] ]
659; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
660; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
661; DISABLED_MASKED_STRIDED:       pred.load.if9:
662; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4
663; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]]
664; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1
665; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i32 4
666; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
667; DISABLED_MASKED_STRIDED:       pred.load.continue10:
668; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = phi <8 x i8> [ [[TMP27]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
669; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
670; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
671; DISABLED_MASKED_STRIDED:       pred.load.if11:
672; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5
673; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]]
674; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1
675; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i32 5
676; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
677; DISABLED_MASKED_STRIDED:       pred.load.continue12:
678; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = phi <8 x i8> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP38]], [[PRED_LOAD_IF11]] ]
679; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
680; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
681; DISABLED_MASKED_STRIDED:       pred.load.if13:
682; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6
683; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]]
684; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1
685; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i32 6
686; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
687; DISABLED_MASKED_STRIDED:       pred.load.continue14:
688; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = phi <8 x i8> [ [[TMP39]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP44]], [[PRED_LOAD_IF13]] ]
689; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
690; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16]]
691; DISABLED_MASKED_STRIDED:       pred.load.if15:
692; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7
693; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]]
694; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1
695; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i32 7
696; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
697; DISABLED_MASKED_STRIDED:       pred.load.continue16:
698; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ]
699; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
700; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = bitcast i8* [[TMP52]] to <8 x i8>*
701; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP51]], <8 x i8>* [[TMP53]], i32 1, <8 x i1> [[TMP3]])
702; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
703; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
704; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
705; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !4
706; DISABLED_MASKED_STRIDED:       for.end:
707; DISABLED_MASKED_STRIDED-NEXT:    ret void
708;
709; ENABLED_MASKED_STRIDED-LABEL: @masked_strided3_optsize_unknown_tc(
710; ENABLED_MASKED_STRIDED-NEXT:  entry:
711; ENABLED_MASKED_STRIDED-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
712; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
713; ENABLED_MASKED_STRIDED:       vector.ph:
714; ENABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
715; ENABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
716; ENABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
717; ENABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
718; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
719; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
720; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
721; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
722; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
723; ENABLED_MASKED_STRIDED:       vector.body:
724; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
725; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
726; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
727; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
728; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul nsw i32 [[INDEX]], 3
729; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
730; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
731; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>*
732; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
733; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>
734; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <24 x i8> @llvm.masked.load.v24i8.p0v24i8(<24 x i8>* [[TMP5]], i32 1, <24 x i1> [[TMP6]], <24 x i8> undef)
735; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_MASKED_VEC]], <24 x i8> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
736; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
737; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>*
738; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]])
739; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
740; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
741; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
742; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !6
743; ENABLED_MASKED_STRIDED:       for.end:
744; ENABLED_MASKED_STRIDED-NEXT:    ret void
745;
746entry:
747  %cmp9 = icmp sgt i32 %n, 0
748  br i1 %cmp9, label %for.body.lr.ph, label %for.end
749
750for.body.lr.ph:
751  %conv = zext i8 %guard to i32
752  br label %for.body
753
754for.body:
755  %ix.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
756  %cmp1 = icmp ugt i32 %ix.010, %conv
757  br i1 %cmp1, label %if.then, label %for.inc
758
759if.then:
760  %mul = mul nsw i32 %ix.010, 3
761  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
762  %0 = load i8, i8* %arrayidx, align 1
763  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.010
764  store i8 %0, i8* %arrayidx3, align 1
765  br label %for.inc
766
767for.inc:
768  %inc = add nuw nsw i32 %ix.010, 1
769  %exitcond = icmp eq i32 %inc, %n
770  br i1 %exitcond, label %for.end.loopexit, label %for.body
771
772for.end.loopexit:
773  br label %for.end
774
775for.end:
776  ret void
777}
778
779
780; Back to stride 2 with gaps with a known trip count under opt for size,
781; but this time the load/store are not predicated.
782; When enable-masked-interleaved-access is disabled, the interleave-groups will
783; be invalidated during cost-model checks because we have gaps and we can't
784; create an epilog. The access is thus scalarized.
785; (Before the fix that this test checks, we used to create an epilogue despite
786; optsize, and vectorized the access as an interleaved-group. This is now fixed,
787; and we make sure that a scalar epilogue does not exist).
788; When enable-masked-interleaved-access is enabled, the interleave-groups will
789; be vectorized with masked wide-loads (masking away the gaps).
790;
791; void unconditional_strided1_optsize(const unsigned char* restrict p,
792;                                unsigned char* restrict q,
793;                                unsigned char guard) {
794;   for(ix=0; ix < 1024; ++ix) {
795;         char t = p[2*ix];
796;         q[ix] = t;
797;   }
798; }
799;
800define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize {
801; DISABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize(
802; DISABLED_MASKED_STRIDED-NEXT:  entry:
803; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
804; DISABLED_MASKED_STRIDED:       vector.body:
805; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
806; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
807; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
808; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[TMP0]], i32 0
809; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
810; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP0]], i32 1
811; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP3]]
812; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP0]], i32 2
813; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP5]]
814; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP0]], i32 3
815; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP7]]
816; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP0]], i32 4
817; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
818; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP0]], i32 5
819; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
820; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP0]], i32 6
821; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP13]]
822; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP0]], i32 7
823; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
824; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP2]], align 1
825; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = load i8, i8* [[TMP4]], align 1
826; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP6]], align 1
827; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = load i8, i8* [[TMP8]], align 1
828; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = load i8, i8* [[TMP10]], align 1
829; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, i8* [[TMP12]], align 1
830; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP14]], align 1
831; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = load i8, i8* [[TMP16]], align 1
832; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = insertelement <8 x i8> undef, i8 [[TMP17]], i32 0
833; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP18]], i32 1
834; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = insertelement <8 x i8> [[TMP26]], i8 [[TMP19]], i32 2
835; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP20]], i32 3
836; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = insertelement <8 x i8> [[TMP28]], i8 [[TMP21]], i32 4
837; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP29]], i8 [[TMP22]], i32 5
838; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = insertelement <8 x i8> [[TMP30]], i8 [[TMP23]], i32 6
839; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP24]], i32 7
840; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
841; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = bitcast i8* [[TMP33]] to <8 x i8>*
842; DISABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[TMP32]], <8 x i8>* [[TMP34]], align 1
843; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
844; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
845; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
846; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP35]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
847; DISABLED_MASKED_STRIDED:       for.end:
848; DISABLED_MASKED_STRIDED-NEXT:    ret void
849;
850; ENABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize(
851; ENABLED_MASKED_STRIDED-NEXT:  entry:
852; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
853; ENABLED_MASKED_STRIDED:       vector.body:
854; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
855; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw i32 [[INDEX]], 1
856; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP0]]
857; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <16 x i8>*
858; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP2]], i32 1, <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> undef)
859; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
860; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
861; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>*
862; ENABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP4]], align 1
863; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
864; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
865; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7
866; ENABLED_MASKED_STRIDED:       for.end:
867; ENABLED_MASKED_STRIDED-NEXT:    ret void
868;
869entry:
870  br label %for.body
871
872for.body:
873  %ix.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
874  %mul = shl nuw nsw i32 %ix.06, 1
875  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
876  %0 = load i8, i8* %arrayidx, align 1
877  %arrayidx1 = getelementptr inbounds i8, i8* %q, i32 %ix.06
878  store i8 %0, i8* %arrayidx1, align 1
879  %inc = add nuw nsw i32 %ix.06, 1
880  %exitcond = icmp eq i32 %inc, 1024
881  br i1 %exitcond, label %for.end, label %for.body
882
883for.end:
884  ret void
885}
886
887
888
889; Unconditioal accesses with gaps under Optsize scenario again, with unknown
890; trip-count this time, in order to check the behavior of folding-the-tail
891; (folding the remainder loop into the main loop using masking) together with
892; interleaved-groups. Folding-the-tail turns the accesses to conditional which
893; requires proper masking. In addition we need to mask out the gaps (all
894; because we are not allowed to use an epilog due to optsize).
895; When enable-masked-interleaved-access is disabled, the interleave-groups will
896; be invalidated during cost-model checks. So there we check for no epilogue
897; and for scalarized conditional accesses.
898; When masked-interleave-group is enabled we check that there is no epilogue,
899; and that the interleave-groups are vectorized using proper masking (with
900; shuffling of the mask feeding the wide masked load/store).
901; The shuffled mask is also And-ed with the gaps mask.
902;
903;   for(ix=0; ix < n; ++ix) {
904;         char t = p[2*ix];
905;         q[ix] = t;
906;   }
907;
908define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %n) local_unnamed_addr optsize {
909; DISABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize_unknown_tc(
910; DISABLED_MASKED_STRIDED-NEXT:  entry:
911; DISABLED_MASKED_STRIDED-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
912; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP6]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
913; DISABLED_MASKED_STRIDED:       vector.ph:
914; DISABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
915; DISABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
916; DISABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
917; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
918; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
919; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
920; DISABLED_MASKED_STRIDED:       vector.body:
921; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
922; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
923; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
924; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
925; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
926; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
927; DISABLED_MASKED_STRIDED:       pred.load.if:
928; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
929; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
930; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
931; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0
932; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
933; DISABLED_MASKED_STRIDED:       pred.load.continue:
934; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
935; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
936; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
937; DISABLED_MASKED_STRIDED:       pred.load.if1:
938; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
939; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
940; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
941; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i32 1
942; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
943; DISABLED_MASKED_STRIDED:       pred.load.continue2:
944; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
945; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
946; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
947; DISABLED_MASKED_STRIDED:       pred.load.if3:
948; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
949; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
950; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
951; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i32 2
952; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
953; DISABLED_MASKED_STRIDED:       pred.load.continue4:
954; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ]
955; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
956; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
957; DISABLED_MASKED_STRIDED:       pred.load.if5:
958; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
959; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
960; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
961; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i32 3
962; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
963; DISABLED_MASKED_STRIDED:       pred.load.continue6:
964; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
965; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
966; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
967; DISABLED_MASKED_STRIDED:       pred.load.if7:
968; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
969; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
970; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
971; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i32 4
972; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
973; DISABLED_MASKED_STRIDED:       pred.load.continue8:
974; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ]
975; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
976; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
977; DISABLED_MASKED_STRIDED:       pred.load.if9:
978; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
979; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
980; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
981; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i32 5
982; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
983; DISABLED_MASKED_STRIDED:       pred.load.continue10:
984; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ]
985; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
986; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
987; DISABLED_MASKED_STRIDED:       pred.load.if11:
988; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6
989; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
990; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
991; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i32 6
992; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
993; DISABLED_MASKED_STRIDED:       pred.load.continue12:
994; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ]
995; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
996; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
997; DISABLED_MASKED_STRIDED:       pred.load.if13:
998; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7
999; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
1000; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
1001; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i32 7
1002; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
1003; DISABLED_MASKED_STRIDED:       pred.load.continue14:
1004; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
1005; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
1006; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>*
1007; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]])
1008; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
1009; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1010; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1011; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP52]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !6
1012; DISABLED_MASKED_STRIDED:       for.end:
1013; DISABLED_MASKED_STRIDED-NEXT:    ret void
1014;
1015; ENABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize_unknown_tc(
1016; ENABLED_MASKED_STRIDED-NEXT:  entry:
1017; ENABLED_MASKED_STRIDED-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
1018; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP6]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
1019; ENABLED_MASKED_STRIDED:       vector.ph:
1020; ENABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
1021; ENABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
1022; ENABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
1023; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
1024; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
1025; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
1026; ENABLED_MASKED_STRIDED:       vector.body:
1027; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1028; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0
1029; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
1030; ENABLED_MASKED_STRIDED-NEXT:    [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1031; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]]
1032; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
1033; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
1034; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
1035; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
1036; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
1037; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> undef)
1038; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1039; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
1040; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>*
1041; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]])
1042; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
1043; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1044; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !8
1045; ENABLED_MASKED_STRIDED:       for.end:
1046; ENABLED_MASKED_STRIDED-NEXT:    ret void
1047;
1048entry:
1049  %cmp6 = icmp sgt i32 %n, 0
1050  br i1 %cmp6, label %for.body.preheader, label %for.end
1051
1052for.body.preheader:
1053  br label %for.body
1054
1055for.body:
1056  %ix.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1057  %mul = shl nuw nsw i32 %ix.07, 1
1058  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
1059  %0 = load i8, i8* %arrayidx, align 1
1060  %arrayidx1 = getelementptr inbounds i8, i8* %q, i32 %ix.07
1061  store i8 %0, i8* %arrayidx1, align 1
1062  %inc = add nuw nsw i32 %ix.07, 1
1063  %exitcond = icmp eq i32 %inc, %n
1064  br i1 %exitcond, label %for.end.loopexit, label %for.body
1065
1066for.end.loopexit:
1067  br label %for.end
1068
1069for.end:
1070  ret void
1071}
1072
1073
1074; Check also a scenario with full interleave-groups (no gaps) as well as both
1075; load and store groups. We check that when masked-interleave-group is disabled
1076; the predicated loads (and stores) are not vectorized as an
1077; interleaved-group but rather as four separate scalarized accesses.
1078; (For SKX, gather/scatter is not supported by the compiler for chars, therefore
1079; the only remaining alternative is to scalarize).
1080; When  masked-interleave-group is enabled we expect to find the proper mask
1081; shuffling code, feeding the wide masked load/store for the two interleave-
1082; groups.
1083;
1084; void masked_strided2(const unsigned char* restrict p,
1085;                     unsigned char* restrict q,
1086;                     unsigned char guard) {
1087; for(ix=0; ix < 1024; ++ix) {
1088;     if (ix > guard) {
1089;         char left = p[2*ix];
1090;         char right = p[2*ix + 1];
1091;         char max = max(left, right);
1092;         q[2*ix] = max;
1093;         q[2*ix+1] = 0 - max;
1094;     }
1095; }
1096;}
1097;
1098define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr  {
1099; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2(
1100; DISABLED_MASKED_STRIDED-NEXT:  entry:
1101; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
1102; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
1103; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
1104; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
1105; DISABLED_MASKED_STRIDED:       vector.body:
1106; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ]
1107; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ]
1108; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
1109; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1110; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
1111; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1112; DISABLED_MASKED_STRIDED:       pred.load.if:
1113; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
1114; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
1115; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
1116; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0
1117; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1118; DISABLED_MASKED_STRIDED:       pred.load.continue:
1119; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
1120; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
1121; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1122; DISABLED_MASKED_STRIDED:       pred.load.if1:
1123; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
1124; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
1125; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
1126; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i32 1
1127; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1128; DISABLED_MASKED_STRIDED:       pred.load.continue2:
1129; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
1130; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
1131; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1132; DISABLED_MASKED_STRIDED:       pred.load.if3:
1133; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
1134; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
1135; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
1136; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i32 2
1137; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1138; DISABLED_MASKED_STRIDED:       pred.load.continue4:
1139; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ]
1140; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
1141; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
1142; DISABLED_MASKED_STRIDED:       pred.load.if5:
1143; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
1144; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
1145; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
1146; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i32 3
1147; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1148; DISABLED_MASKED_STRIDED:       pred.load.continue6:
1149; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
1150; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
1151; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
1152; DISABLED_MASKED_STRIDED:       pred.load.if7:
1153; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
1154; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
1155; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
1156; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i32 4
1157; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
1158; DISABLED_MASKED_STRIDED:       pred.load.continue8:
1159; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ]
1160; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
1161; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
1162; DISABLED_MASKED_STRIDED:       pred.load.if9:
1163; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
1164; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
1165; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
1166; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i32 5
1167; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
1168; DISABLED_MASKED_STRIDED:       pred.load.continue10:
1169; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ]
1170; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
1171; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
1172; DISABLED_MASKED_STRIDED:       pred.load.if11:
1173; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6
1174; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
1175; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
1176; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i32 6
1177; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
1178; DISABLED_MASKED_STRIDED:       pred.load.continue12:
1179; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ]
1180; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
1181; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
1182; DISABLED_MASKED_STRIDED:       pred.load.if13:
1183; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7
1184; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
1185; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
1186; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i32 7
1187; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
1188; DISABLED_MASKED_STRIDED:       pred.load.continue14:
1189; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
1190; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = or <8 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1191; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
1192; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
1193; DISABLED_MASKED_STRIDED:       pred.load.if15:
1194; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i32 0
1195; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP52]]
1196; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, i8* [[TMP53]], align 1
1197; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = insertelement <8 x i8> undef, i8 [[TMP54]], i32 0
1198; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
1199; DISABLED_MASKED_STRIDED:       pred.load.continue16:
1200; DISABLED_MASKED_STRIDED-NEXT:    [[TMP56:%.*]] = phi <8 x i8> [ undef, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ]
1201; DISABLED_MASKED_STRIDED-NEXT:    [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
1202; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
1203; DISABLED_MASKED_STRIDED:       pred.load.if17:
1204; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i32 1
1205; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP58]]
1206; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, i8* [[TMP59]], align 1
1207; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i32 1
1208; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
1209; DISABLED_MASKED_STRIDED:       pred.load.continue18:
1210; DISABLED_MASKED_STRIDED-NEXT:    [[TMP62:%.*]] = phi <8 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP61]], [[PRED_LOAD_IF17]] ]
1211; DISABLED_MASKED_STRIDED-NEXT:    [[TMP63:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
1212; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
1213; DISABLED_MASKED_STRIDED:       pred.load.if19:
1214; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i32 2
1215; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP64]]
1216; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, i8* [[TMP65]], align 1
1217; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i32 2
1218; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
1219; DISABLED_MASKED_STRIDED:       pred.load.continue20:
1220; DISABLED_MASKED_STRIDED-NEXT:    [[TMP68:%.*]] = phi <8 x i8> [ [[TMP62]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP67]], [[PRED_LOAD_IF19]] ]
1221; DISABLED_MASKED_STRIDED-NEXT:    [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
1222; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
1223; DISABLED_MASKED_STRIDED:       pred.load.if21:
1224; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i32 3
1225; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP70]]
1226; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, i8* [[TMP71]], align 1
1227; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i32 3
1228; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
1229; DISABLED_MASKED_STRIDED:       pred.load.continue22:
1230; DISABLED_MASKED_STRIDED-NEXT:    [[TMP74:%.*]] = phi <8 x i8> [ [[TMP68]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP73]], [[PRED_LOAD_IF21]] ]
1231; DISABLED_MASKED_STRIDED-NEXT:    [[TMP75:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
1232; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
1233; DISABLED_MASKED_STRIDED:       pred.load.if23:
1234; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i32 4
1235; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP76]]
1236; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, i8* [[TMP77]], align 1
1237; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i32 4
1238; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
1239; DISABLED_MASKED_STRIDED:       pred.load.continue24:
1240; DISABLED_MASKED_STRIDED-NEXT:    [[TMP80:%.*]] = phi <8 x i8> [ [[TMP74]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP79]], [[PRED_LOAD_IF23]] ]
1241; DISABLED_MASKED_STRIDED-NEXT:    [[TMP81:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
1242; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
1243; DISABLED_MASKED_STRIDED:       pred.load.if25:
1244; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i32 5
1245; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP82]]
1246; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, i8* [[TMP83]], align 1
1247; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i32 5
1248; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
1249; DISABLED_MASKED_STRIDED:       pred.load.continue26:
1250; DISABLED_MASKED_STRIDED-NEXT:    [[TMP86:%.*]] = phi <8 x i8> [ [[TMP80]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ]
1251; DISABLED_MASKED_STRIDED-NEXT:    [[TMP87:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
1252; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
1253; DISABLED_MASKED_STRIDED:       pred.load.if27:
1254; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i32 6
1255; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP88]]
1256; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, i8* [[TMP89]], align 1
1257; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i32 6
1258; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
1259; DISABLED_MASKED_STRIDED:       pred.load.continue28:
1260; DISABLED_MASKED_STRIDED-NEXT:    [[TMP92:%.*]] = phi <8 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP91]], [[PRED_LOAD_IF27]] ]
1261; DISABLED_MASKED_STRIDED-NEXT:    [[TMP93:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
1262; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
1263; DISABLED_MASKED_STRIDED:       pred.load.if29:
1264; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i32 7
1265; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP94]]
1266; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, i8* [[TMP95]], align 1
1267; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i32 7
1268; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
1269; DISABLED_MASKED_STRIDED:       pred.load.continue30:
1270; DISABLED_MASKED_STRIDED-NEXT:    [[TMP98:%.*]] = phi <8 x i8> [ [[TMP92]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP97]], [[PRED_LOAD_IF29]] ]
1271; DISABLED_MASKED_STRIDED-NEXT:    [[TMP99:%.*]] = icmp slt <8 x i8> [[TMP49]], [[TMP98]]
1272; DISABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = select <8 x i1> [[TMP99]], <8 x i8> [[TMP98]], <8 x i8> [[TMP49]]
1273; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
1274; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP101]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1275; DISABLED_MASKED_STRIDED:       pred.store.if:
1276; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
1277; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP102]]
1278; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = extractelement <8 x i8> [[TMP100]], i32 0
1279; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP104]], i8* [[TMP103]], align 1
1280; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
1281; DISABLED_MASKED_STRIDED:       pred.store.continue:
1282; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
1283; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP105]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
1284; DISABLED_MASKED_STRIDED:       pred.store.if31:
1285; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
1286; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]]
1287; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i8> [[TMP100]], i32 1
1288; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP108]], i8* [[TMP107]], align 1
1289; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE32]]
1290; DISABLED_MASKED_STRIDED:       pred.store.continue32:
1291; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
1292; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP109]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
1293; DISABLED_MASKED_STRIDED:       pred.store.if33:
1294; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
1295; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]]
1296; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i8> [[TMP100]], i32 2
1297; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP112]], i8* [[TMP111]], align 1
1298; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
1299; DISABLED_MASKED_STRIDED:       pred.store.continue34:
1300; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
1301; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP113]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
1302; DISABLED_MASKED_STRIDED:       pred.store.if35:
1303; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
1304; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]]
1305; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i8> [[TMP100]], i32 3
1306; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP116]], i8* [[TMP115]], align 1
1307; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
1308; DISABLED_MASKED_STRIDED:       pred.store.continue36:
1309; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
1310; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP117]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
1311; DISABLED_MASKED_STRIDED:       pred.store.if37:
1312; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
1313; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]]
1314; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i8> [[TMP100]], i32 4
1315; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP120]], i8* [[TMP119]], align 1
1316; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
1317; DISABLED_MASKED_STRIDED:       pred.store.continue38:
1318; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
1319; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP121]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
1320; DISABLED_MASKED_STRIDED:       pred.store.if39:
1321; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
1322; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]]
1323; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i8> [[TMP100]], i32 5
1324; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP124]], i8* [[TMP123]], align 1
1325; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
1326; DISABLED_MASKED_STRIDED:       pred.store.continue40:
1327; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
1328; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP125]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
1329; DISABLED_MASKED_STRIDED:       pred.store.if41:
1330; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6
1331; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]]
1332; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i8> [[TMP100]], i32 6
1333; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP128]], i8* [[TMP127]], align 1
1334; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
1335; DISABLED_MASKED_STRIDED:       pred.store.continue42:
1336; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
1337; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP129]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
1338; DISABLED_MASKED_STRIDED:       pred.store.if43:
1339; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7
1340; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]]
1341; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = extractelement <8 x i8> [[TMP100]], i32 7
1342; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP132]], i8* [[TMP131]], align 1
1343; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
1344; DISABLED_MASKED_STRIDED:       pred.store.continue44:
1345; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = sub <8 x i8> zeroinitializer, [[TMP100]]
1346; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
1347; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP134]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
1348; DISABLED_MASKED_STRIDED:       pred.store.if45:
1349; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = extractelement <8 x i32> [[TMP50]], i32 0
1350; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP135]]
1351; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i8> [[TMP133]], i32 0
1352; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP137]], i8* [[TMP136]], align 1
1353; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
1354; DISABLED_MASKED_STRIDED:       pred.store.continue46:
1355; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
1356; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP138]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
1357; DISABLED_MASKED_STRIDED:       pred.store.if47:
1358; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = extractelement <8 x i32> [[TMP50]], i32 1
1359; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]]
1360; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i8> [[TMP133]], i32 1
1361; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP141]], i8* [[TMP140]], align 1
1362; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
1363; DISABLED_MASKED_STRIDED:       pred.store.continue48:
1364; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
1365; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP142]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
1366; DISABLED_MASKED_STRIDED:       pred.store.if49:
1367; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = extractelement <8 x i32> [[TMP50]], i32 2
1368; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]]
1369; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i8> [[TMP133]], i32 2
1370; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP145]], i8* [[TMP144]], align 1
1371; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
1372; DISABLED_MASKED_STRIDED:       pred.store.continue50:
1373; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
1374; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP146]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
1375; DISABLED_MASKED_STRIDED:       pred.store.if51:
1376; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = extractelement <8 x i32> [[TMP50]], i32 3
1377; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]]
1378; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i8> [[TMP133]], i32 3
1379; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP149]], i8* [[TMP148]], align 1
1380; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
1381; DISABLED_MASKED_STRIDED:       pred.store.continue52:
1382; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
1383; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP150]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
1384; DISABLED_MASKED_STRIDED:       pred.store.if53:
1385; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = extractelement <8 x i32> [[TMP50]], i32 4
1386; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]]
1387; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i8> [[TMP133]], i32 4
1388; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP153]], i8* [[TMP152]], align 1
1389; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
1390; DISABLED_MASKED_STRIDED:       pred.store.continue54:
1391; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
1392; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP154]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
1393; DISABLED_MASKED_STRIDED:       pred.store.if55:
1394; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = extractelement <8 x i32> [[TMP50]], i32 5
1395; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]]
1396; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i8> [[TMP133]], i32 5
1397; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP157]], i8* [[TMP156]], align 1
1398; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
1399; DISABLED_MASKED_STRIDED:       pred.store.continue56:
1400; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
1401; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP158]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
1402; DISABLED_MASKED_STRIDED:       pred.store.if57:
1403; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = extractelement <8 x i32> [[TMP50]], i32 6
1404; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]]
1405; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i8> [[TMP133]], i32 6
1406; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP161]], i8* [[TMP160]], align 1
1407; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
1408; DISABLED_MASKED_STRIDED:       pred.store.continue58:
1409; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
1410; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP162]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
1411; DISABLED_MASKED_STRIDED:       pred.store.if59:
1412; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = extractelement <8 x i32> [[TMP50]], i32 7
1413; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]]
1414; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = extractelement <8 x i8> [[TMP133]], i32 7
1415; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP165]], i8* [[TMP164]], align 1
1416; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
1417; DISABLED_MASKED_STRIDED:       pred.store.continue60:
1418; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
1419; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1420; DISABLED_MASKED_STRIDED-NEXT:    [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
1421; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP166]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7
1422; DISABLED_MASKED_STRIDED:       for.end:
1423; DISABLED_MASKED_STRIDED-NEXT:    ret void
1424;
1425; ENABLED_MASKED_STRIDED-LABEL: @masked_strided2(
1426; ENABLED_MASKED_STRIDED-NEXT:  entry:
1427; ENABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
1428; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
1429; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
1430; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
1431; ENABLED_MASKED_STRIDED:       vector.body:
1432; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1433; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1434; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
1435; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
1436; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
1437; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
1438; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
1439; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
1440; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1441; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1442; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], 1
1443; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]]
1444; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]]
1445; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]]
1446; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1
1447; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i32 [[TMP4]]
1448; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>*
1449; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1450; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
1451; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
1452; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1453; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
1454; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP11]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !9
1455; ENABLED_MASKED_STRIDED:       for.end:
1456; ENABLED_MASKED_STRIDED-NEXT:    ret void
1457;
1458entry:
1459  %conv = zext i8 %guard to i32
1460  br label %for.body
1461
1462for.body:
1463  %ix.024 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
1464  %cmp1 = icmp ugt i32 %ix.024, %conv
1465  br i1 %cmp1, label %if.then, label %for.inc
1466
1467if.then:
1468  %mul = shl nuw nsw i32 %ix.024, 1
1469  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
1470  %0 = load i8, i8* %arrayidx, align 1
1471  %add = or i32 %mul, 1
1472  %arrayidx4 = getelementptr inbounds i8, i8* %p, i32 %add
1473  %1 = load i8, i8* %arrayidx4, align 1
1474  %cmp.i = icmp slt i8 %0, %1
1475  %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
1476  %arrayidx6 = getelementptr inbounds i8, i8* %q, i32 %mul
1477  store i8 %spec.select.i, i8* %arrayidx6, align 1
1478  %sub = sub i8 0, %spec.select.i
1479  %arrayidx11 = getelementptr inbounds i8, i8* %q, i32 %add
1480  store i8 %sub, i8* %arrayidx11, align 1
1481  br label %for.inc
1482
1483for.inc:
1484  %inc = add nuw nsw i32 %ix.024, 1
1485  %exitcond = icmp eq i32 %inc, 1024
1486  br i1 %exitcond, label %for.end, label %for.body
1487
1488for.end:
1489  ret void
1490}
1491
1492; Full groups again, this time checking an Optsize scenario, with unknown trip-
1493; count, to check the behavior of folding-the-tail (folding the remainder loop
1494; into the main loop using masking) together with interleaved-groups.
1495; When masked-interleave-group is disabled the interleave-groups will be
1496; invalidated during Legality check, so nothing to check here.
1497; When masked-interleave-group is enabled we check that there is no epilogue,
1498; and that the interleave-groups are vectorized using proper masking (with
1499; shuffling of the mask feeding the wide masked load/store).
1500; The mask itself is an And of two masks: one that masks away the remainder
1501; iterations, and one that masks away the 'else' of the 'if' statement.
1502;
1503; void masked_strided2_unknown_tc(const unsigned char* restrict p,
1504;                     unsigned char* restrict q,
1505;                     unsigned char guard,
1506;                     int n) {
1507; for(ix=0; ix < n; ++ix) {
1508;     if (ix > guard) {
1509;         char left = p[2*ix];
1510;         char right = p[2*ix + 1];
1511;         char max = max(left, right);
1512;         q[2*ix] = max;
1513;         q[2*ix+1] = 0 - max;
1514;     }
1515; }
1516;}
1517;
1518define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %guard, i32 %n) local_unnamed_addr optsize {
1519; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2_unknown_tc(
1520; DISABLED_MASKED_STRIDED-NEXT:  entry:
1521; DISABLED_MASKED_STRIDED-NEXT:    [[CMP22:%.*]] = icmp sgt i32 [[N:%.*]], 0
1522; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP22]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
1523; DISABLED_MASKED_STRIDED:       vector.ph:
1524; DISABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
1525; DISABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
1526; DISABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
1527; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
1528; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
1529; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[GUARD:%.*]], i32 0
1530; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
1531; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
1532; DISABLED_MASKED_STRIDED:       vector.body:
1533; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE62:%.*]] ]
1534; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE62]] ]
1535; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
1536; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
1537; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1538; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
1539; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
1540; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1541; DISABLED_MASKED_STRIDED:       pred.load.if:
1542; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0
1543; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
1544; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
1545; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> undef, i8 [[TMP7]], i32 0
1546; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1547; DISABLED_MASKED_STRIDED:       pred.load.continue:
1548; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
1549; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
1550; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1551; DISABLED_MASKED_STRIDED:       pred.load.if3:
1552; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1
1553; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
1554; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1
1555; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i32 1
1556; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1557; DISABLED_MASKED_STRIDED:       pred.load.continue4:
1558; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = phi <8 x i8> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
1559; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
1560; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
1561; DISABLED_MASKED_STRIDED:       pred.load.if5:
1562; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2
1563; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]]
1564; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1
1565; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i32 2
1566; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1567; DISABLED_MASKED_STRIDED:       pred.load.continue6:
1568; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = phi <8 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP20]], [[PRED_LOAD_IF5]] ]
1569; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
1570; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
1571; DISABLED_MASKED_STRIDED:       pred.load.if7:
1572; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3
1573; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]]
1574; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1
1575; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i32 3
1576; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
1577; DISABLED_MASKED_STRIDED:       pred.load.continue8:
1578; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = phi <8 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP26]], [[PRED_LOAD_IF7]] ]
1579; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
1580; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
1581; DISABLED_MASKED_STRIDED:       pred.load.if9:
1582; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4
1583; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]]
1584; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1
1585; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i32 4
1586; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
1587; DISABLED_MASKED_STRIDED:       pred.load.continue10:
1588; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = phi <8 x i8> [ [[TMP27]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
1589; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
1590; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
1591; DISABLED_MASKED_STRIDED:       pred.load.if11:
1592; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5
1593; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]]
1594; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1
1595; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i32 5
1596; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
1597; DISABLED_MASKED_STRIDED:       pred.load.continue12:
1598; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = phi <8 x i8> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP38]], [[PRED_LOAD_IF11]] ]
1599; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
1600; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
1601; DISABLED_MASKED_STRIDED:       pred.load.if13:
1602; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6
1603; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]]
1604; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1
1605; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i32 6
1606; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
1607; DISABLED_MASKED_STRIDED:       pred.load.continue14:
1608; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = phi <8 x i8> [ [[TMP39]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP44]], [[PRED_LOAD_IF13]] ]
1609; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
1610; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
1611; DISABLED_MASKED_STRIDED:       pred.load.if15:
1612; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7
1613; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]]
1614; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1
1615; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i32 7
1616; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
1617; DISABLED_MASKED_STRIDED:       pred.load.continue16:
1618; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ]
1619; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = or <8 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1620; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
1621; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP53]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
1622; DISABLED_MASKED_STRIDED:       pred.load.if17:
1623; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = extractelement <8 x i32> [[TMP52]], i32 0
1624; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP54]]
1625; DISABLED_MASKED_STRIDED-NEXT:    [[TMP56:%.*]] = load i8, i8* [[TMP55]], align 1
1626; DISABLED_MASKED_STRIDED-NEXT:    [[TMP57:%.*]] = insertelement <8 x i8> undef, i8 [[TMP56]], i32 0
1627; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
1628; DISABLED_MASKED_STRIDED:       pred.load.continue18:
1629; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = phi <8 x i8> [ undef, [[PRED_LOAD_CONTINUE16]] ], [ [[TMP57]], [[PRED_LOAD_IF17]] ]
1630; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
1631; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP59]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
1632; DISABLED_MASKED_STRIDED:       pred.load.if19:
1633; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = extractelement <8 x i32> [[TMP52]], i32 1
1634; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP60]]
1635; DISABLED_MASKED_STRIDED-NEXT:    [[TMP62:%.*]] = load i8, i8* [[TMP61]], align 1
1636; DISABLED_MASKED_STRIDED-NEXT:    [[TMP63:%.*]] = insertelement <8 x i8> [[TMP58]], i8 [[TMP62]], i32 1
1637; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
1638; DISABLED_MASKED_STRIDED:       pred.load.continue20:
1639; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = phi <8 x i8> [ [[TMP58]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP63]], [[PRED_LOAD_IF19]] ]
1640; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
1641; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP65]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
1642; DISABLED_MASKED_STRIDED:       pred.load.if21:
1643; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = extractelement <8 x i32> [[TMP52]], i32 2
1644; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP66]]
1645; DISABLED_MASKED_STRIDED-NEXT:    [[TMP68:%.*]] = load i8, i8* [[TMP67]], align 1
1646; DISABLED_MASKED_STRIDED-NEXT:    [[TMP69:%.*]] = insertelement <8 x i8> [[TMP64]], i8 [[TMP68]], i32 2
1647; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
1648; DISABLED_MASKED_STRIDED:       pred.load.continue22:
1649; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = phi <8 x i8> [ [[TMP64]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP69]], [[PRED_LOAD_IF21]] ]
1650; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
1651; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP71]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
1652; DISABLED_MASKED_STRIDED:       pred.load.if23:
1653; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = extractelement <8 x i32> [[TMP52]], i32 3
1654; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP72]]
1655; DISABLED_MASKED_STRIDED-NEXT:    [[TMP74:%.*]] = load i8, i8* [[TMP73]], align 1
1656; DISABLED_MASKED_STRIDED-NEXT:    [[TMP75:%.*]] = insertelement <8 x i8> [[TMP70]], i8 [[TMP74]], i32 3
1657; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
1658; DISABLED_MASKED_STRIDED:       pred.load.continue24:
1659; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = phi <8 x i8> [ [[TMP70]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP75]], [[PRED_LOAD_IF23]] ]
1660; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
1661; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP77]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
1662; DISABLED_MASKED_STRIDED:       pred.load.if25:
1663; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = extractelement <8 x i32> [[TMP52]], i32 4
1664; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP78]]
1665; DISABLED_MASKED_STRIDED-NEXT:    [[TMP80:%.*]] = load i8, i8* [[TMP79]], align 1
1666; DISABLED_MASKED_STRIDED-NEXT:    [[TMP81:%.*]] = insertelement <8 x i8> [[TMP76]], i8 [[TMP80]], i32 4
1667; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
1668; DISABLED_MASKED_STRIDED:       pred.load.continue26:
1669; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = phi <8 x i8> [ [[TMP76]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP81]], [[PRED_LOAD_IF25]] ]
1670; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
1671; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP83]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
1672; DISABLED_MASKED_STRIDED:       pred.load.if27:
1673; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = extractelement <8 x i32> [[TMP52]], i32 5
1674; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP84]]
1675; DISABLED_MASKED_STRIDED-NEXT:    [[TMP86:%.*]] = load i8, i8* [[TMP85]], align 1
1676; DISABLED_MASKED_STRIDED-NEXT:    [[TMP87:%.*]] = insertelement <8 x i8> [[TMP82]], i8 [[TMP86]], i32 5
1677; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
1678; DISABLED_MASKED_STRIDED:       pred.load.continue28:
1679; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = phi <8 x i8> [ [[TMP82]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP87]], [[PRED_LOAD_IF27]] ]
1680; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
1681; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP89]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
1682; DISABLED_MASKED_STRIDED:       pred.load.if29:
1683; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = extractelement <8 x i32> [[TMP52]], i32 6
1684; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP90]]
1685; DISABLED_MASKED_STRIDED-NEXT:    [[TMP92:%.*]] = load i8, i8* [[TMP91]], align 1
1686; DISABLED_MASKED_STRIDED-NEXT:    [[TMP93:%.*]] = insertelement <8 x i8> [[TMP88]], i8 [[TMP92]], i32 6
1687; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
1688; DISABLED_MASKED_STRIDED:       pred.load.continue30:
1689; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = phi <8 x i8> [ [[TMP88]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP93]], [[PRED_LOAD_IF29]] ]
1690; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
1691; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP95]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
1692; DISABLED_MASKED_STRIDED:       pred.load.if31:
1693; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = extractelement <8 x i32> [[TMP52]], i32 7
1694; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP96]]
1695; DISABLED_MASKED_STRIDED-NEXT:    [[TMP98:%.*]] = load i8, i8* [[TMP97]], align 1
1696; DISABLED_MASKED_STRIDED-NEXT:    [[TMP99:%.*]] = insertelement <8 x i8> [[TMP94]], i8 [[TMP98]], i32 7
1697; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
1698; DISABLED_MASKED_STRIDED:       pred.load.continue32:
1699; DISABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = phi <8 x i8> [ [[TMP94]], [[PRED_LOAD_CONTINUE30]] ], [ [[TMP99]], [[PRED_LOAD_IF31]] ]
1700; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = icmp slt <8 x i8> [[TMP51]], [[TMP100]]
1701; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = select <8 x i1> [[TMP101]], <8 x i8> [[TMP100]], <8 x i8> [[TMP51]]
1702; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
1703; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP103]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1704; DISABLED_MASKED_STRIDED:       pred.store.if:
1705; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0
1706; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP104]]
1707; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = extractelement <8 x i8> [[TMP102]], i32 0
1708; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP106]], i8* [[TMP105]], align 1
1709; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
1710; DISABLED_MASKED_STRIDED:       pred.store.continue:
1711; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
1712; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP107]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
1713; DISABLED_MASKED_STRIDED:       pred.store.if33:
1714; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1
1715; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP108]]
1716; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = extractelement <8 x i8> [[TMP102]], i32 1
1717; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP110]], i8* [[TMP109]], align 1
1718; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
1719; DISABLED_MASKED_STRIDED:       pred.store.continue34:
1720; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
1721; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP111]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
1722; DISABLED_MASKED_STRIDED:       pred.store.if35:
1723; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2
1724; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP112]]
1725; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = extractelement <8 x i8> [[TMP102]], i32 2
1726; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP114]], i8* [[TMP113]], align 1
1727; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
1728; DISABLED_MASKED_STRIDED:       pred.store.continue36:
1729; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
1730; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP115]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
1731; DISABLED_MASKED_STRIDED:       pred.store.if37:
1732; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3
1733; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP116]]
1734; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = extractelement <8 x i8> [[TMP102]], i32 3
1735; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP118]], i8* [[TMP117]], align 1
1736; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
1737; DISABLED_MASKED_STRIDED:       pred.store.continue38:
1738; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
1739; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP119]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
1740; DISABLED_MASKED_STRIDED:       pred.store.if39:
1741; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4
1742; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP120]]
1743; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = extractelement <8 x i8> [[TMP102]], i32 4
1744; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP122]], i8* [[TMP121]], align 1
1745; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
1746; DISABLED_MASKED_STRIDED:       pred.store.continue40:
1747; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
1748; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP123]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
1749; DISABLED_MASKED_STRIDED:       pred.store.if41:
1750; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5
1751; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP124]]
1752; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = extractelement <8 x i8> [[TMP102]], i32 5
1753; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP126]], i8* [[TMP125]], align 1
1754; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
1755; DISABLED_MASKED_STRIDED:       pred.store.continue42:
1756; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
1757; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP127]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
1758; DISABLED_MASKED_STRIDED:       pred.store.if43:
1759; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6
1760; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP128]]
1761; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = extractelement <8 x i8> [[TMP102]], i32 6
1762; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP130]], i8* [[TMP129]], align 1
1763; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
1764; DISABLED_MASKED_STRIDED:       pred.store.continue44:
1765; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
1766; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP131]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
1767; DISABLED_MASKED_STRIDED:       pred.store.if45:
1768; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7
1769; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP132]]
1770; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i8> [[TMP102]], i32 7
1771; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP134]], i8* [[TMP133]], align 1
1772; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
1773; DISABLED_MASKED_STRIDED:       pred.store.continue46:
1774; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = sub <8 x i8> zeroinitializer, [[TMP102]]
1775; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
1776; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP136]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
1777; DISABLED_MASKED_STRIDED:       pred.store.if47:
1778; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i32> [[TMP52]], i32 0
1779; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP137]]
1780; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = extractelement <8 x i8> [[TMP135]], i32 0
1781; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP139]], i8* [[TMP138]], align 1
1782; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
1783; DISABLED_MASKED_STRIDED:       pred.store.continue48:
1784; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
1785; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP140]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
1786; DISABLED_MASKED_STRIDED:       pred.store.if49:
1787; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i32> [[TMP52]], i32 1
1788; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP141]]
1789; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = extractelement <8 x i8> [[TMP135]], i32 1
1790; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP143]], i8* [[TMP142]], align 1
1791; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
1792; DISABLED_MASKED_STRIDED:       pred.store.continue50:
1793; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
1794; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP144]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
1795; DISABLED_MASKED_STRIDED:       pred.store.if51:
1796; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i32> [[TMP52]], i32 2
1797; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP145]]
1798; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = extractelement <8 x i8> [[TMP135]], i32 2
1799; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP147]], i8* [[TMP146]], align 1
1800; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
1801; DISABLED_MASKED_STRIDED:       pred.store.continue52:
1802; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
1803; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP148]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
1804; DISABLED_MASKED_STRIDED:       pred.store.if53:
1805; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i32> [[TMP52]], i32 3
1806; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP149]]
1807; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = extractelement <8 x i8> [[TMP135]], i32 3
1808; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP151]], i8* [[TMP150]], align 1
1809; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
1810; DISABLED_MASKED_STRIDED:       pred.store.continue54:
1811; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
1812; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP152]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
1813; DISABLED_MASKED_STRIDED:       pred.store.if55:
1814; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i32> [[TMP52]], i32 4
1815; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP153]]
1816; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = extractelement <8 x i8> [[TMP135]], i32 4
1817; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP155]], i8* [[TMP154]], align 1
1818; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
1819; DISABLED_MASKED_STRIDED:       pred.store.continue56:
1820; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
1821; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP156]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
1822; DISABLED_MASKED_STRIDED:       pred.store.if57:
1823; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i32> [[TMP52]], i32 5
1824; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP157]]
1825; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = extractelement <8 x i8> [[TMP135]], i32 5
1826; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP159]], i8* [[TMP158]], align 1
1827; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
1828; DISABLED_MASKED_STRIDED:       pred.store.continue58:
1829; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
1830; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP160]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]]
1831; DISABLED_MASKED_STRIDED:       pred.store.if59:
1832; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i32> [[TMP52]], i32 6
1833; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP161]]
1834; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = extractelement <8 x i8> [[TMP135]], i32 6
1835; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP163]], i8* [[TMP162]], align 1
1836; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
1837; DISABLED_MASKED_STRIDED:       pred.store.continue60:
1838; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
1839; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP164]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]]
1840; DISABLED_MASKED_STRIDED:       pred.store.if61:
1841; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = extractelement <8 x i32> [[TMP52]], i32 7
1842; DISABLED_MASKED_STRIDED-NEXT:    [[TMP166:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP165]]
1843; DISABLED_MASKED_STRIDED-NEXT:    [[TMP167:%.*]] = extractelement <8 x i8> [[TMP135]], i32 7
1844; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP167]], i8* [[TMP166]], align 1
1845; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE62]]
1846; DISABLED_MASKED_STRIDED:       pred.store.continue62:
1847; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
1848; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1849; DISABLED_MASKED_STRIDED-NEXT:    [[TMP168:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1850; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP168]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !8
1851; DISABLED_MASKED_STRIDED:       for.end:
1852; DISABLED_MASKED_STRIDED-NEXT:    ret void
1853;
1854; ENABLED_MASKED_STRIDED-LABEL: @masked_strided2_unknown_tc(
1855; ENABLED_MASKED_STRIDED-NEXT:  entry:
1856; ENABLED_MASKED_STRIDED-NEXT:    [[CMP22:%.*]] = icmp sgt i32 [[N:%.*]], 0
1857; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP22]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
1858; ENABLED_MASKED_STRIDED:       vector.ph:
1859; ENABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
1860; ENABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
1861; ENABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
1862; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
1863; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
1864; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[GUARD:%.*]], i32 0
1865; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
1866; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
1867; ENABLED_MASKED_STRIDED:       vector.body:
1868; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1869; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1870; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
1871; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
1872; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1
1873; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
1874; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
1875; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
1876; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
1877; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
1878; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1879; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1880; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = or i32 [[TMP2]], 1
1881; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]]
1882; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]]
1883; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = sub <8 x i8> zeroinitializer, [[TMP8]]
1884; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1
1885; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i32 [[TMP6]]
1886; ENABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP11]] to <16 x i8>*
1887; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP8]], <8 x i8> [[TMP9]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1888; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP12]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
1889; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
1890; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
1891; ENABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1892; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP13]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !10
1893; ENABLED_MASKED_STRIDED:       for.end:
1894; ENABLED_MASKED_STRIDED-NEXT:    ret void
1895;
1896entry:
1897  %cmp22 = icmp sgt i32 %n, 0
1898  br i1 %cmp22, label %for.body.preheader, label %for.end
1899
1900for.body.preheader:
1901  br label %for.body
1902
1903for.body:
1904  %ix.023 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
1905  %cmp1 = icmp sgt i32 %ix.023, %guard
1906  br i1 %cmp1, label %if.then, label %for.inc
1907
1908if.then:
1909  %mul = shl nuw nsw i32 %ix.023, 1
1910  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
1911  %0 = load i8, i8* %arrayidx, align 1
1912  %add = or i32 %mul, 1
1913  %arrayidx3 = getelementptr inbounds i8, i8* %p, i32 %add
1914  %1 = load i8, i8* %arrayidx3, align 1
1915  %cmp.i = icmp slt i8 %0, %1
1916  %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
1917  %arrayidx5 = getelementptr inbounds i8, i8* %q, i32 %mul
1918  store i8 %spec.select.i, i8* %arrayidx5, align 1
1919  %sub = sub i8 0, %spec.select.i
1920  %arrayidx9 = getelementptr inbounds i8, i8* %q, i32 %add
1921  store i8 %sub, i8* %arrayidx9, align 1
1922  br label %for.inc
1923
1924for.inc:
1925  %inc = add nuw nsw i32 %ix.023, 1
1926  %exitcond = icmp eq i32 %inc, %n
1927  br i1 %exitcond, label %for.end.loopexit, label %for.body
1928
1929for.end.loopexit:
1930  br label %for.end
1931
1932for.end:
1933  ret void
1934}
1935
1936; Full groups under Optsize scenario again, with unknown trip-count, again in
1937; order to check the behavior of folding-the-tail (folding the remainder loop
1938; into the main loop using masking) together with interleaved-groups.
1939; This time the accesses are not conditional, they become conditional only
1940; due to tail folding.
1941; When masked-interleave-group is disabled the interleave-groups will be
1942; invalidated during cost-model checks, so we check for no epilogue and
1943; scalarized conditional accesses.
1944; When masked-interleave-group is enabled we check for no epilogue,
1945; and interleave-groups vectorized using proper masking (with
1946; shuffling of the mask feeding the wide masked load/store).
1947; (Same vectorization scheme as for the previous loop with conditional accesses
1948; except here the mask only masks away the remainder iterations.)
1949;
1950; void unconditional_masked_strided2_unknown_tc(const unsigned char* restrict p,
1951;                     unsigned char* restrict q,
1952;                     int n) {
1953; for(ix=0; ix < n; ++ix) {
1954;         char left = p[2*ix];
1955;         char right = p[2*ix + 1];
1956;         char max = max(left, right);
1957;         q[2*ix] = max;
1958;         q[2*ix+1] = 0 - max;
1959; }
1960;}
1961;
1962define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %n) local_unnamed_addr optsize {
1963; DISABLED_MASKED_STRIDED-LABEL: @unconditional_masked_strided2_unknown_tc(
1964; DISABLED_MASKED_STRIDED-NEXT:  entry:
1965; DISABLED_MASKED_STRIDED-NEXT:    [[CMP20:%.*]] = icmp sgt i32 [[N:%.*]], 0
1966; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP20]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
1967; DISABLED_MASKED_STRIDED:       vector.ph:
1968; DISABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
1969; DISABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
1970; DISABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
1971; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
1972; DISABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
1973; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
1974; DISABLED_MASKED_STRIDED:       vector.body:
1975; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ]
1976; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ]
1977; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
1978; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1979; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
1980; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1981; DISABLED_MASKED_STRIDED:       pred.load.if:
1982; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
1983; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
1984; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
1985; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0
1986; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1987; DISABLED_MASKED_STRIDED:       pred.load.continue:
1988; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
1989; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
1990; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1991; DISABLED_MASKED_STRIDED:       pred.load.if1:
1992; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
1993; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
1994; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
1995; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i32 1
1996; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1997; DISABLED_MASKED_STRIDED:       pred.load.continue2:
1998; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
1999; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
2000; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
2001; DISABLED_MASKED_STRIDED:       pred.load.if3:
2002; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
2003; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
2004; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
2005; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i32 2
2006; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
2007; DISABLED_MASKED_STRIDED:       pred.load.continue4:
2008; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ]
2009; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
2010; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
2011; DISABLED_MASKED_STRIDED:       pred.load.if5:
2012; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
2013; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
2014; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
2015; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i32 3
2016; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
2017; DISABLED_MASKED_STRIDED:       pred.load.continue6:
2018; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
2019; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
2020; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
2021; DISABLED_MASKED_STRIDED:       pred.load.if7:
2022; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
2023; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
2024; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
2025; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i32 4
2026; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
2027; DISABLED_MASKED_STRIDED:       pred.load.continue8:
2028; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ]
2029; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
2030; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
2031; DISABLED_MASKED_STRIDED:       pred.load.if9:
2032; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
2033; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
2034; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
2035; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i32 5
2036; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
2037; DISABLED_MASKED_STRIDED:       pred.load.continue10:
2038; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ]
2039; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
2040; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
2041; DISABLED_MASKED_STRIDED:       pred.load.if11:
2042; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6
2043; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
2044; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
2045; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i32 6
2046; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
2047; DISABLED_MASKED_STRIDED:       pred.load.continue12:
2048; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ]
2049; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
2050; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
2051; DISABLED_MASKED_STRIDED:       pred.load.if13:
2052; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7
2053; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
2054; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
2055; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i32 7
2056; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
2057; DISABLED_MASKED_STRIDED:       pred.load.continue14:
2058; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
2059; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = or <8 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2060; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
2061; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
2062; DISABLED_MASKED_STRIDED:       pred.load.if15:
2063; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i32 0
2064; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP52]]
2065; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, i8* [[TMP53]], align 1
2066; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = insertelement <8 x i8> undef, i8 [[TMP54]], i32 0
2067; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
2068; DISABLED_MASKED_STRIDED:       pred.load.continue16:
2069; DISABLED_MASKED_STRIDED-NEXT:    [[TMP56:%.*]] = phi <8 x i8> [ undef, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ]
2070; DISABLED_MASKED_STRIDED-NEXT:    [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
2071; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
2072; DISABLED_MASKED_STRIDED:       pred.load.if17:
2073; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i32 1
2074; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP58]]
2075; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, i8* [[TMP59]], align 1
2076; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i32 1
2077; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
2078; DISABLED_MASKED_STRIDED:       pred.load.continue18:
2079; DISABLED_MASKED_STRIDED-NEXT:    [[TMP62:%.*]] = phi <8 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP61]], [[PRED_LOAD_IF17]] ]
2080; DISABLED_MASKED_STRIDED-NEXT:    [[TMP63:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
2081; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
2082; DISABLED_MASKED_STRIDED:       pred.load.if19:
2083; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i32 2
2084; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP64]]
2085; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, i8* [[TMP65]], align 1
2086; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i32 2
2087; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
2088; DISABLED_MASKED_STRIDED:       pred.load.continue20:
2089; DISABLED_MASKED_STRIDED-NEXT:    [[TMP68:%.*]] = phi <8 x i8> [ [[TMP62]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP67]], [[PRED_LOAD_IF19]] ]
2090; DISABLED_MASKED_STRIDED-NEXT:    [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
2091; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
2092; DISABLED_MASKED_STRIDED:       pred.load.if21:
2093; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i32 3
2094; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP70]]
2095; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, i8* [[TMP71]], align 1
2096; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i32 3
2097; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
2098; DISABLED_MASKED_STRIDED:       pred.load.continue22:
2099; DISABLED_MASKED_STRIDED-NEXT:    [[TMP74:%.*]] = phi <8 x i8> [ [[TMP68]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP73]], [[PRED_LOAD_IF21]] ]
2100; DISABLED_MASKED_STRIDED-NEXT:    [[TMP75:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
2101; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
2102; DISABLED_MASKED_STRIDED:       pred.load.if23:
2103; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i32 4
2104; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP76]]
2105; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, i8* [[TMP77]], align 1
2106; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i32 4
2107; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
2108; DISABLED_MASKED_STRIDED:       pred.load.continue24:
2109; DISABLED_MASKED_STRIDED-NEXT:    [[TMP80:%.*]] = phi <8 x i8> [ [[TMP74]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP79]], [[PRED_LOAD_IF23]] ]
2110; DISABLED_MASKED_STRIDED-NEXT:    [[TMP81:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
2111; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
2112; DISABLED_MASKED_STRIDED:       pred.load.if25:
2113; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i32 5
2114; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP82]]
2115; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, i8* [[TMP83]], align 1
2116; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i32 5
2117; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
2118; DISABLED_MASKED_STRIDED:       pred.load.continue26:
2119; DISABLED_MASKED_STRIDED-NEXT:    [[TMP86:%.*]] = phi <8 x i8> [ [[TMP80]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ]
2120; DISABLED_MASKED_STRIDED-NEXT:    [[TMP87:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
2121; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
2122; DISABLED_MASKED_STRIDED:       pred.load.if27:
2123; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i32 6
2124; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP88]]
2125; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, i8* [[TMP89]], align 1
2126; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i32 6
2127; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
2128; DISABLED_MASKED_STRIDED:       pred.load.continue28:
2129; DISABLED_MASKED_STRIDED-NEXT:    [[TMP92:%.*]] = phi <8 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP91]], [[PRED_LOAD_IF27]] ]
2130; DISABLED_MASKED_STRIDED-NEXT:    [[TMP93:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
2131; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
2132; DISABLED_MASKED_STRIDED:       pred.load.if29:
2133; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i32 7
2134; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP94]]
2135; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, i8* [[TMP95]], align 1
2136; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i32 7
2137; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
2138; DISABLED_MASKED_STRIDED:       pred.load.continue30:
2139; DISABLED_MASKED_STRIDED-NEXT:    [[TMP98:%.*]] = phi <8 x i8> [ [[TMP92]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP97]], [[PRED_LOAD_IF29]] ]
2140; DISABLED_MASKED_STRIDED-NEXT:    [[TMP99:%.*]] = icmp slt <8 x i8> [[TMP49]], [[TMP98]]
2141; DISABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = select <8 x i1> [[TMP99]], <8 x i8> [[TMP98]], <8 x i8> [[TMP49]]
2142; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
2143; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP101]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
2144; DISABLED_MASKED_STRIDED:       pred.store.if:
2145; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
2146; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP102]]
2147; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = extractelement <8 x i8> [[TMP100]], i32 0
2148; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP104]], i8* [[TMP103]], align 1
2149; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
2150; DISABLED_MASKED_STRIDED:       pred.store.continue:
2151; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
2152; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP105]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
2153; DISABLED_MASKED_STRIDED:       pred.store.if31:
2154; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1
2155; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]]
2156; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i8> [[TMP100]], i32 1
2157; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP108]], i8* [[TMP107]], align 1
2158; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE32]]
2159; DISABLED_MASKED_STRIDED:       pred.store.continue32:
2160; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
2161; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP109]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
2162; DISABLED_MASKED_STRIDED:       pred.store.if33:
2163; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
2164; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]]
2165; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i8> [[TMP100]], i32 2
2166; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP112]], i8* [[TMP111]], align 1
2167; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
2168; DISABLED_MASKED_STRIDED:       pred.store.continue34:
2169; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
2170; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP113]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
2171; DISABLED_MASKED_STRIDED:       pred.store.if35:
2172; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
2173; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]]
2174; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i8> [[TMP100]], i32 3
2175; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP116]], i8* [[TMP115]], align 1
2176; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
2177; DISABLED_MASKED_STRIDED:       pred.store.continue36:
2178; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
2179; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP117]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
2180; DISABLED_MASKED_STRIDED:       pred.store.if37:
2181; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
2182; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]]
2183; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i8> [[TMP100]], i32 4
2184; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP120]], i8* [[TMP119]], align 1
2185; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
2186; DISABLED_MASKED_STRIDED:       pred.store.continue38:
2187; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
2188; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP121]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
2189; DISABLED_MASKED_STRIDED:       pred.store.if39:
2190; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
2191; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]]
2192; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i8> [[TMP100]], i32 5
2193; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP124]], i8* [[TMP123]], align 1
2194; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
2195; DISABLED_MASKED_STRIDED:       pred.store.continue40:
2196; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
2197; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP125]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
2198; DISABLED_MASKED_STRIDED:       pred.store.if41:
2199; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6
2200; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]]
2201; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i8> [[TMP100]], i32 6
2202; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP128]], i8* [[TMP127]], align 1
2203; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
2204; DISABLED_MASKED_STRIDED:       pred.store.continue42:
2205; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
2206; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP129]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
2207; DISABLED_MASKED_STRIDED:       pred.store.if43:
2208; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7
2209; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]]
2210; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = extractelement <8 x i8> [[TMP100]], i32 7
2211; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP132]], i8* [[TMP131]], align 1
2212; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
2213; DISABLED_MASKED_STRIDED:       pred.store.continue44:
2214; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = sub <8 x i8> zeroinitializer, [[TMP100]]
2215; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
2216; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP134]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
2217; DISABLED_MASKED_STRIDED:       pred.store.if45:
2218; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = extractelement <8 x i32> [[TMP50]], i32 0
2219; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP135]]
2220; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i8> [[TMP133]], i32 0
2221; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP137]], i8* [[TMP136]], align 1
2222; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
2223; DISABLED_MASKED_STRIDED:       pred.store.continue46:
2224; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
2225; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP138]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
2226; DISABLED_MASKED_STRIDED:       pred.store.if47:
2227; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = extractelement <8 x i32> [[TMP50]], i32 1
2228; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]]
2229; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i8> [[TMP133]], i32 1
2230; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP141]], i8* [[TMP140]], align 1
2231; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
2232; DISABLED_MASKED_STRIDED:       pred.store.continue48:
2233; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
2234; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP142]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
2235; DISABLED_MASKED_STRIDED:       pred.store.if49:
2236; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = extractelement <8 x i32> [[TMP50]], i32 2
2237; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]]
2238; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i8> [[TMP133]], i32 2
2239; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP145]], i8* [[TMP144]], align 1
2240; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
2241; DISABLED_MASKED_STRIDED:       pred.store.continue50:
2242; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
2243; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP146]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
2244; DISABLED_MASKED_STRIDED:       pred.store.if51:
2245; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = extractelement <8 x i32> [[TMP50]], i32 3
2246; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]]
2247; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i8> [[TMP133]], i32 3
2248; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP149]], i8* [[TMP148]], align 1
2249; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
2250; DISABLED_MASKED_STRIDED:       pred.store.continue52:
2251; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
2252; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP150]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
2253; DISABLED_MASKED_STRIDED:       pred.store.if53:
2254; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = extractelement <8 x i32> [[TMP50]], i32 4
2255; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]]
2256; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i8> [[TMP133]], i32 4
2257; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP153]], i8* [[TMP152]], align 1
2258; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
2259; DISABLED_MASKED_STRIDED:       pred.store.continue54:
2260; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
2261; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP154]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
2262; DISABLED_MASKED_STRIDED:       pred.store.if55:
2263; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = extractelement <8 x i32> [[TMP50]], i32 5
2264; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]]
2265; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i8> [[TMP133]], i32 5
2266; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP157]], i8* [[TMP156]], align 1
2267; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
2268; DISABLED_MASKED_STRIDED:       pred.store.continue56:
2269; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
2270; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP158]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
2271; DISABLED_MASKED_STRIDED:       pred.store.if57:
2272; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = extractelement <8 x i32> [[TMP50]], i32 6
2273; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]]
2274; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i8> [[TMP133]], i32 6
2275; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP161]], i8* [[TMP160]], align 1
2276; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
2277; DISABLED_MASKED_STRIDED:       pred.store.continue58:
2278; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
2279; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP162]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
2280; DISABLED_MASKED_STRIDED:       pred.store.if59:
2281; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = extractelement <8 x i32> [[TMP50]], i32 7
2282; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]]
2283; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = extractelement <8 x i8> [[TMP133]], i32 7
2284; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP165]], i8* [[TMP164]], align 1
2285; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
2286; DISABLED_MASKED_STRIDED:       pred.store.continue60:
2287; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
2288; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
2289; DISABLED_MASKED_STRIDED-NEXT:    [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2290; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP166]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !9
2291; DISABLED_MASKED_STRIDED:       for.end:
2292; DISABLED_MASKED_STRIDED-NEXT:    ret void
2293;
2294; ENABLED_MASKED_STRIDED-LABEL: @unconditional_masked_strided2_unknown_tc(
2295; ENABLED_MASKED_STRIDED-NEXT:  entry:
2296; ENABLED_MASKED_STRIDED-NEXT:    [[CMP20:%.*]] = icmp sgt i32 [[N:%.*]], 0
2297; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP20]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]]
2298; ENABLED_MASKED_STRIDED:       vector.ph:
2299; ENABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
2300; ENABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
2301; ENABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
2302; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
2303; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
2304; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
2305; ENABLED_MASKED_STRIDED:       vector.body:
2306; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2307; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0
2308; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
2309; ENABLED_MASKED_STRIDED-NEXT:    [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2310; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]]
2311; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
2312; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
2313; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
2314; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
2315; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef)
2316; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
2317; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
2318; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], 1
2319; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]]
2320; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]]
2321; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]]
2322; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1
2323; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i32 [[TMP4]]
2324; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>*
2325; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
2326; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
2327; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
2328; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2329; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP11]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !11
2330; ENABLED_MASKED_STRIDED:       for.end:
2331; ENABLED_MASKED_STRIDED-NEXT:    ret void
2332;
2333entry:
2334  %cmp20 = icmp sgt i32 %n, 0
2335  br i1 %cmp20, label %for.body.preheader, label %for.end
2336
2337for.body.preheader:
2338  br label %for.body
2339
2340for.body:
2341  %ix.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
2342  %mul = shl nuw nsw i32 %ix.021, 1
2343  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
2344  %0 = load i8, i8* %arrayidx, align 1
2345  %add = or i32 %mul, 1
2346  %arrayidx2 = getelementptr inbounds i8, i8* %p, i32 %add
2347  %1 = load i8, i8* %arrayidx2, align 1
2348  %cmp.i = icmp slt i8 %0, %1
2349  %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
2350  %arrayidx4 = getelementptr inbounds i8, i8* %q, i32 %mul
2351  store i8 %spec.select.i, i8* %arrayidx4, align 1
2352  %sub = sub i8 0, %spec.select.i
2353  %arrayidx8 = getelementptr inbounds i8, i8* %q, i32 %add
2354  store i8 %sub, i8* %arrayidx8, align 1
2355  %inc = add nuw nsw i32 %ix.021, 1
2356  %exitcond = icmp eq i32 %inc, %n
2357  br i1 %exitcond, label %for.end.loopexit, label %for.body
2358
2359for.end.loopexit:
2360  br label %for.end
2361
2362for.end:
2363  ret void
2364}
2365
2366