1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mcpu=skx -S -loop-vectorize -instcombine -simplifycfg -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED 3; RUN: opt -mcpu=skx -S -loop-vectorize -instcombine -simplifycfg -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED 4 5target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" 6target triple = "i386-unknown-linux-gnu" 7 8; When masked-interleaved-groups are disabled: 9; Check that the predicated load is not vectorized as an 10; interleaved-group but rather as a scalarized accesses. 11; (For SKX, Gather is not supported by the compiler for chars, therefore 12; the only remaining alternative is to scalarize). 13; In this case a scalar epilogue is not needed. 14; 15; When masked-interleave-group is enabled we expect to find the proper mask 16; shuffling code, feeding the wide masked load for an interleave-group (with 17; a single member). 18; Since the last (second) member of the load-group is a gap, peeling is used, 19; so we also expect to find a scalar epilogue loop. 20; 21; void masked_strided1(const unsigned char* restrict p, 22; unsigned char* restrict q, 23; unsigned char guard) { 24; for(ix=0; ix < 1024; ++ix) { 25; if (ix > guard) { 26; char t = p[2*ix]; 27; q[ix] = t; 28; } 29; } 30; } 31 32define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr { 33; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1( 34; DISABLED_MASKED_STRIDED-NEXT: entry: 35; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 36; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 37; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 38; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 39; DISABLED_MASKED_STRIDED: vector.body: 40; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] 41; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] 42; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 43; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 44; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 45; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 46; DISABLED_MASKED_STRIDED: pred.load.if: 47; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 48; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]] 49; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 50; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0 51; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]] 52; DISABLED_MASKED_STRIDED: pred.load.continue: 53; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] 54; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 55; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 56; DISABLED_MASKED_STRIDED: pred.load.if1: 57; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 58; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]] 59; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1 60; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i32 1 61; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE2]] 62; DISABLED_MASKED_STRIDED: pred.load.continue2: 63; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] 64; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 65; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 66; DISABLED_MASKED_STRIDED: pred.load.if3: 67; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 68; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]] 69; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1 70; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i32 2 71; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] 72; DISABLED_MASKED_STRIDED: pred.load.continue4: 73; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] 74; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 75; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 76; DISABLED_MASKED_STRIDED: pred.load.if5: 77; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 78; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]] 79; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1 80; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i32 3 81; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] 82; DISABLED_MASKED_STRIDED: pred.load.continue6: 83; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 84; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 85; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] 86; DISABLED_MASKED_STRIDED: pred.load.if7: 87; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 88; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]] 89; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1 90; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i32 4 91; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] 92; DISABLED_MASKED_STRIDED: pred.load.continue8: 93; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ] 94; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 95; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] 96; DISABLED_MASKED_STRIDED: pred.load.if9: 97; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 98; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]] 99; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1 100; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i32 5 101; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] 102; DISABLED_MASKED_STRIDED: pred.load.continue10: 103; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ] 104; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 105; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] 106; DISABLED_MASKED_STRIDED: pred.load.if11: 107; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 108; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]] 109; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1 110; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i32 6 111; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] 112; DISABLED_MASKED_STRIDED: pred.load.continue12: 113; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ] 114; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 115; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] 116; DISABLED_MASKED_STRIDED: pred.load.if13: 117; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 118; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]] 119; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1 120; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i32 7 121; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] 122; DISABLED_MASKED_STRIDED: pred.load.continue14: 123; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] 124; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 125; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>* 126; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]]) 127; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 128; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 129; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 130; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 131; DISABLED_MASKED_STRIDED: for.end: 132; DISABLED_MASKED_STRIDED-NEXT: ret void 133; 134; ENABLED_MASKED_STRIDED-LABEL: @masked_strided1( 135; ENABLED_MASKED_STRIDED-NEXT: entry: 136; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 137; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 138; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 139; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 140; ENABLED_MASKED_STRIDED: vector.body: 141; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 142; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 143; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 144; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 145; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] 146; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* 147; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 148; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) 149; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 150; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 151; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i8>* 152; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP5]], i32 1, <8 x i1> [[TMP0]]) 153; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 154; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 155; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016 156; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 157; ENABLED_MASKED_STRIDED: for.body: 158; ENABLED_MASKED_STRIDED-NEXT: [[IX_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1016, [[VECTOR_BODY]] ] 159; ENABLED_MASKED_STRIDED-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[IX_09]], [[CONV]] 160; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 161; ENABLED_MASKED_STRIDED: if.then: 162; ENABLED_MASKED_STRIDED-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_09]], 1 163; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[MUL]] 164; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 165; ENABLED_MASKED_STRIDED-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[IX_09]] 166; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP7]], i8* [[ARRAYIDX3]], align 1 167; ENABLED_MASKED_STRIDED-NEXT: br label [[FOR_INC]] 168; ENABLED_MASKED_STRIDED: for.inc: 169; ENABLED_MASKED_STRIDED-NEXT: [[INC]] = add nuw nsw i32 [[IX_09]], 1 170; ENABLED_MASKED_STRIDED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 171; ENABLED_MASKED_STRIDED-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop !2 172; ENABLED_MASKED_STRIDED: for.end: 173; ENABLED_MASKED_STRIDED-NEXT: ret void 174; 175entry: 176 %conv = zext i8 %guard to i32 177 br label %for.body 178 179for.body: 180 %ix.09 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 181 %cmp1 = icmp ugt i32 %ix.09, %conv 182 br i1 %cmp1, label %if.then, label %for.inc 183 184if.then: 185 %mul = shl nuw nsw i32 %ix.09, 1 186 %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul 187 %0 = load i8, i8* %arrayidx, align 1 188 %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09 189 store i8 %0, i8* %arrayidx3, align 1 190 br label %for.inc 191 192for.inc: 193 %inc = add nuw nsw i32 %ix.09, 1 194 %exitcond = icmp eq i32 %inc, 1024 195 br i1 %exitcond, label %for.end, label %for.body 196 197for.end: 198 ret void 199} 200 201; Exactly the same scenario except we are now optimizing for size, therefore 202; we check that no scalar epilogue is created. Since we can't create an epilog 203; we need the ability to mask out the gaps. 204; When enable-masked-interleaved-access is enabled, the interleave-groups will 205; be vectorized with masked wide-loads with the mask properly shuffled and 206; And-ed with the gaps mask. 207; 208define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize { 209; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize( 210; DISABLED_MASKED_STRIDED-NEXT: entry: 211; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 212; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 213; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 214; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 215; DISABLED_MASKED_STRIDED: vector.body: 216; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] 217; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] 218; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 219; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 220; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 221; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 222; DISABLED_MASKED_STRIDED: pred.load.if: 223; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 224; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]] 225; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 226; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0 227; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]] 228; DISABLED_MASKED_STRIDED: pred.load.continue: 229; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] 230; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 231; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 232; DISABLED_MASKED_STRIDED: pred.load.if1: 233; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 234; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]] 235; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1 236; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i32 1 237; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE2]] 238; DISABLED_MASKED_STRIDED: pred.load.continue2: 239; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] 240; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 241; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 242; DISABLED_MASKED_STRIDED: pred.load.if3: 243; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 244; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]] 245; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1 246; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i32 2 247; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] 248; DISABLED_MASKED_STRIDED: pred.load.continue4: 249; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] 250; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 251; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 252; DISABLED_MASKED_STRIDED: pred.load.if5: 253; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 254; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]] 255; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1 256; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i32 3 257; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] 258; DISABLED_MASKED_STRIDED: pred.load.continue6: 259; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 260; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 261; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] 262; DISABLED_MASKED_STRIDED: pred.load.if7: 263; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 264; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]] 265; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1 266; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i32 4 267; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] 268; DISABLED_MASKED_STRIDED: pred.load.continue8: 269; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ] 270; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 271; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] 272; DISABLED_MASKED_STRIDED: pred.load.if9: 273; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 274; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]] 275; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1 276; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i32 5 277; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] 278; DISABLED_MASKED_STRIDED: pred.load.continue10: 279; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ] 280; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 281; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] 282; DISABLED_MASKED_STRIDED: pred.load.if11: 283; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 284; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]] 285; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1 286; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i32 6 287; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] 288; DISABLED_MASKED_STRIDED: pred.load.continue12: 289; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ] 290; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 291; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] 292; DISABLED_MASKED_STRIDED: pred.load.if13: 293; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 294; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]] 295; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1 296; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i32 7 297; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] 298; DISABLED_MASKED_STRIDED: pred.load.continue14: 299; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] 300; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 301; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>* 302; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]]) 303; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 304; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 305; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 306; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !2 307; DISABLED_MASKED_STRIDED: for.end: 308; DISABLED_MASKED_STRIDED-NEXT: ret void 309; 310; ENABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize( 311; ENABLED_MASKED_STRIDED-NEXT: entry: 312; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 313; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 314; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 315; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 316; ENABLED_MASKED_STRIDED: vector.body: 317; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 318; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 319; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 320; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 321; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] 322; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* 323; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 324; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 325; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> undef) 326; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 327; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 328; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>* 329; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]]) 330; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 331; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 332; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 333; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 334; ENABLED_MASKED_STRIDED: for.end: 335; ENABLED_MASKED_STRIDED-NEXT: ret void 336; 337entry: 338 %conv = zext i8 %guard to i32 339 br label %for.body 340 341for.body: 342 %ix.09 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 343 %cmp1 = icmp ugt i32 %ix.09, %conv 344 br i1 %cmp1, label %if.then, label %for.inc 345 346if.then: 347 %mul = shl nuw nsw i32 %ix.09, 1 348 %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul 349 %0 = load i8, i8* %arrayidx, align 1 350 %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09 351 store i8 %0, i8* %arrayidx3, align 1 352 br label %for.inc 353 354for.inc: 355 %inc = add nuw nsw i32 %ix.09, 1 356 %exitcond = icmp eq i32 %inc, 1024 357 br i1 %exitcond, label %for.end, label %for.body 358 359for.end: 360 ret void 361} 362 363 364; Accesses with gaps under Optsize scenario again, with unknown trip-count 365; this time, in order to check the behavior of folding-the-tail (folding the 366; remainder loop into the main loop using masking) together with interleaved- 367; groups. 368; When masked-interleave-group is disabled the interleave-groups will be 369; invalidated during Legality checks; So there we check for no epilogue 370; and for scalarized conditional accesses. 371; When masked-interleave-group is enabled we check that there is no epilogue, 372; and that the interleave-groups are vectorized using proper masking (with 373; shuffling of the mask feeding the wide masked load/store). 374; The mask itself is an And of two masks: one that masks away the remainder 375; iterations, and one that masks away the 'else' of the 'if' statement. 376; The shuffled mask is also And-ed with the gaps mask. 377; 378; void masked_strided1_optsize_unknown_tc(const unsigned char* restrict p, 379; unsigned char* restrict q, 380; unsigned char guard, 381; int n) { 382; for(ix=0; ix < n; ++ix) { 383; if (ix > guard) { 384; char t = p[2*ix]; 385; q[ix] = t; 386; } 387; } 388; } 389; 390define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize { 391; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize_unknown_tc( 392; DISABLED_MASKED_STRIDED-NEXT: entry: 393; DISABLED_MASKED_STRIDED-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 394; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 395; DISABLED_MASKED_STRIDED: vector.ph: 396; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 397; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 398; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 399; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 400; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 401; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 402; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 403; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer 404; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 405; DISABLED_MASKED_STRIDED: vector.body: 406; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ] 407; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE16]] ] 408; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] 409; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 410; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 411; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] 412; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 413; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 414; DISABLED_MASKED_STRIDED: pred.load.if: 415; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 416; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]] 417; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1 418; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> undef, i8 [[TMP7]], i32 0 419; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]] 420; DISABLED_MASKED_STRIDED: pred.load.continue: 421; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] 422; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1 423; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 424; DISABLED_MASKED_STRIDED: pred.load.if3: 425; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 426; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]] 427; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1 428; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i32 1 429; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] 430; DISABLED_MASKED_STRIDED: pred.load.continue4: 431; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = phi <8 x i8> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] 432; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2 433; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 434; DISABLED_MASKED_STRIDED: pred.load.if5: 435; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 436; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]] 437; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1 438; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i32 2 439; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] 440; DISABLED_MASKED_STRIDED: pred.load.continue6: 441; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = phi <8 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP20]], [[PRED_LOAD_IF5]] ] 442; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3 443; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] 444; DISABLED_MASKED_STRIDED: pred.load.if7: 445; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 446; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]] 447; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1 448; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i32 3 449; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] 450; DISABLED_MASKED_STRIDED: pred.load.continue8: 451; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = phi <8 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP26]], [[PRED_LOAD_IF7]] ] 452; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4 453; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] 454; DISABLED_MASKED_STRIDED: pred.load.if9: 455; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 456; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]] 457; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1 458; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i32 4 459; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] 460; DISABLED_MASKED_STRIDED: pred.load.continue10: 461; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = phi <8 x i8> [ [[TMP27]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] 462; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5 463; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] 464; DISABLED_MASKED_STRIDED: pred.load.if11: 465; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 466; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]] 467; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1 468; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i32 5 469; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] 470; DISABLED_MASKED_STRIDED: pred.load.continue12: 471; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = phi <8 x i8> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP38]], [[PRED_LOAD_IF11]] ] 472; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6 473; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]] 474; DISABLED_MASKED_STRIDED: pred.load.if13: 475; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 476; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]] 477; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1 478; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i32 6 479; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] 480; DISABLED_MASKED_STRIDED: pred.load.continue14: 481; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = phi <8 x i8> [ [[TMP39]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP44]], [[PRED_LOAD_IF13]] ] 482; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7 483; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16]] 484; DISABLED_MASKED_STRIDED: pred.load.if15: 485; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 486; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]] 487; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1 488; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i32 7 489; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] 490; DISABLED_MASKED_STRIDED: pred.load.continue16: 491; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ] 492; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 493; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP52]] to <8 x i8>* 494; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP51]], <8 x i8>* [[TMP53]], i32 1, <8 x i1> [[TMP3]]) 495; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 496; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 497; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 498; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !3 499; DISABLED_MASKED_STRIDED: for.end: 500; DISABLED_MASKED_STRIDED-NEXT: ret void 501; 502; ENABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize_unknown_tc( 503; ENABLED_MASKED_STRIDED-NEXT: entry: 504; ENABLED_MASKED_STRIDED-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 505; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 506; ENABLED_MASKED_STRIDED: vector.ph: 507; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 508; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 509; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 510; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 511; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 512; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 513; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 514; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer 515; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 516; ENABLED_MASKED_STRIDED: vector.body: 517; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 518; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 519; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] 520; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 521; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1 522; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] 523; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] 524; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* 525; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 526; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 527; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[TMP6]], <16 x i8> undef) 528; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 529; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 530; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>* 531; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]]) 532; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 533; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 534; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 535; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !5 536; ENABLED_MASKED_STRIDED: for.end: 537; ENABLED_MASKED_STRIDED-NEXT: ret void 538; 539entry: 540 %cmp9 = icmp sgt i32 %n, 0 541 br i1 %cmp9, label %for.body.lr.ph, label %for.end 542 543for.body.lr.ph: 544 %conv = zext i8 %guard to i32 545 br label %for.body 546 547for.body: 548 %ix.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] 549 %cmp1 = icmp ugt i32 %ix.010, %conv 550 br i1 %cmp1, label %if.then, label %for.inc 551 552if.then: 553 %mul = shl nuw nsw i32 %ix.010, 1 554 %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul 555 %0 = load i8, i8* %arrayidx, align 1 556 %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.010 557 store i8 %0, i8* %arrayidx3, align 1 558 br label %for.inc 559 560for.inc: 561 %inc = add nuw nsw i32 %ix.010, 1 562 %exitcond = icmp eq i32 %inc, %n 563 br i1 %exitcond, label %for.end.loopexit, label %for.body 564 565for.end.loopexit: 566 br label %for.end 567 568for.end: 569 ret void 570} 571 572; Same, with stride 3. This is to check the gaps-mask and the shuffled mask 573; with a different stride. 574; So accesses are with gaps under Optsize scenario again, with unknown trip- 575; count, in order to check the behavior of folding-the-tail (folding the 576; remainder loop into the main loop using masking) together with interleaved- 577; groups. 578; When masked-interleave-group is enabled we check that there is no epilogue, 579; and that the interleave-groups are vectorized using proper masking (with 580; shuffling of the mask feeding the wide masked load/store). 581; The mask itself is an And of two masks: one that masks away the remainder 582; iterations, and one that masks away the 'else' of the 'if' statement. 583; The shuffled mask is also And-ed with the gaps mask. 584; 585; void masked_strided3_optsize_unknown_tc(const unsigned char* restrict p, 586; unsigned char* restrict q, 587; unsigned char guard, 588; int n) { 589; for(ix=0; ix < n; ++ix) { 590; if (ix > guard) { 591; char t = p[3*ix]; 592; q[ix] = t; 593; } 594; } 595; } 596; 597define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize { 598; DISABLED_MASKED_STRIDED-LABEL: @masked_strided3_optsize_unknown_tc( 599; DISABLED_MASKED_STRIDED-NEXT: entry: 600; DISABLED_MASKED_STRIDED-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 601; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 602; DISABLED_MASKED_STRIDED: vector.ph: 603; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 604; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 605; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 606; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 607; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 608; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 609; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 610; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer 611; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 612; DISABLED_MASKED_STRIDED: vector.body: 613; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE16:%.*]] ] 614; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE16]] ] 615; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] 616; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 617; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 618; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] 619; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 620; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 621; DISABLED_MASKED_STRIDED: pred.load.if: 622; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 623; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]] 624; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1 625; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> undef, i8 [[TMP7]], i32 0 626; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]] 627; DISABLED_MASKED_STRIDED: pred.load.continue: 628; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] 629; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1 630; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 631; DISABLED_MASKED_STRIDED: pred.load.if3: 632; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 633; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]] 634; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1 635; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i32 1 636; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] 637; DISABLED_MASKED_STRIDED: pred.load.continue4: 638; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = phi <8 x i8> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] 639; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2 640; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 641; DISABLED_MASKED_STRIDED: pred.load.if5: 642; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 643; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]] 644; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1 645; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i32 2 646; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] 647; DISABLED_MASKED_STRIDED: pred.load.continue6: 648; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = phi <8 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP20]], [[PRED_LOAD_IF5]] ] 649; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3 650; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] 651; DISABLED_MASKED_STRIDED: pred.load.if7: 652; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 653; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]] 654; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1 655; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i32 3 656; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] 657; DISABLED_MASKED_STRIDED: pred.load.continue8: 658; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = phi <8 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP26]], [[PRED_LOAD_IF7]] ] 659; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4 660; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] 661; DISABLED_MASKED_STRIDED: pred.load.if9: 662; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 663; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]] 664; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1 665; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i32 4 666; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] 667; DISABLED_MASKED_STRIDED: pred.load.continue10: 668; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = phi <8 x i8> [ [[TMP27]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] 669; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5 670; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] 671; DISABLED_MASKED_STRIDED: pred.load.if11: 672; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 673; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]] 674; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1 675; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i32 5 676; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] 677; DISABLED_MASKED_STRIDED: pred.load.continue12: 678; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = phi <8 x i8> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP38]], [[PRED_LOAD_IF11]] ] 679; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6 680; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]] 681; DISABLED_MASKED_STRIDED: pred.load.if13: 682; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 683; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]] 684; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1 685; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i32 6 686; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] 687; DISABLED_MASKED_STRIDED: pred.load.continue14: 688; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = phi <8 x i8> [ [[TMP39]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP44]], [[PRED_LOAD_IF13]] ] 689; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7 690; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16]] 691; DISABLED_MASKED_STRIDED: pred.load.if15: 692; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 693; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]] 694; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1 695; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i32 7 696; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] 697; DISABLED_MASKED_STRIDED: pred.load.continue16: 698; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ] 699; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 700; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP52]] to <8 x i8>* 701; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP51]], <8 x i8>* [[TMP53]], i32 1, <8 x i1> [[TMP3]]) 702; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 703; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 704; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 705; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP54]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !4 706; DISABLED_MASKED_STRIDED: for.end: 707; DISABLED_MASKED_STRIDED-NEXT: ret void 708; 709; ENABLED_MASKED_STRIDED-LABEL: @masked_strided3_optsize_unknown_tc( 710; ENABLED_MASKED_STRIDED-NEXT: entry: 711; ENABLED_MASKED_STRIDED-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 712; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP9]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 713; ENABLED_MASKED_STRIDED: vector.ph: 714; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 715; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 716; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 717; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 718; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 719; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 720; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 721; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer 722; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 723; ENABLED_MASKED_STRIDED: vector.body: 724; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 725; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 726; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] 727; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 728; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = mul nsw i32 [[INDEX]], 3 729; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] 730; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] 731; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>* 732; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7> 733; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false> 734; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <24 x i8> @llvm.masked.load.v24i8.p0v24i8(<24 x i8>* [[TMP5]], i32 1, <24 x i1> [[TMP6]], <24 x i8> undef) 735; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_MASKED_VEC]], <24 x i8> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 736; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 737; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>* 738; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]]) 739; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 740; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 741; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 742; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP9]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !6 743; ENABLED_MASKED_STRIDED: for.end: 744; ENABLED_MASKED_STRIDED-NEXT: ret void 745; 746entry: 747 %cmp9 = icmp sgt i32 %n, 0 748 br i1 %cmp9, label %for.body.lr.ph, label %for.end 749 750for.body.lr.ph: 751 %conv = zext i8 %guard to i32 752 br label %for.body 753 754for.body: 755 %ix.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] 756 %cmp1 = icmp ugt i32 %ix.010, %conv 757 br i1 %cmp1, label %if.then, label %for.inc 758 759if.then: 760 %mul = mul nsw i32 %ix.010, 3 761 %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul 762 %0 = load i8, i8* %arrayidx, align 1 763 %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.010 764 store i8 %0, i8* %arrayidx3, align 1 765 br label %for.inc 766 767for.inc: 768 %inc = add nuw nsw i32 %ix.010, 1 769 %exitcond = icmp eq i32 %inc, %n 770 br i1 %exitcond, label %for.end.loopexit, label %for.body 771 772for.end.loopexit: 773 br label %for.end 774 775for.end: 776 ret void 777} 778 779 780; Back to stride 2 with gaps with a known trip count under opt for size, 781; but this time the load/store are not predicated. 782; When enable-masked-interleaved-access is disabled, the interleave-groups will 783; be invalidated during cost-model checks because we have gaps and we can't 784; create an epilog. The access is thus scalarized. 785; (Before the fix that this test checks, we used to create an epilogue despite 786; optsize, and vectorized the access as an interleaved-group. This is now fixed, 787; and we make sure that a scalar epilogue does not exist). 788; When enable-masked-interleaved-access is enabled, the interleave-groups will 789; be vectorized with masked wide-loads (masking away the gaps). 790; 791; void unconditional_strided1_optsize(const unsigned char* restrict p, 792; unsigned char* restrict q, 793; unsigned char guard) { 794; for(ix=0; ix < 1024; ++ix) { 795; char t = p[2*ix]; 796; q[ix] = t; 797; } 798; } 799; 800define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize { 801; DISABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize( 802; DISABLED_MASKED_STRIDED-NEXT: entry: 803; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 804; DISABLED_MASKED_STRIDED: vector.body: 805; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 806; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 807; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 808; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[TMP0]], i32 0 809; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] 810; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP0]], i32 1 811; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP3]] 812; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP0]], i32 2 813; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP5]] 814; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP0]], i32 3 815; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP7]] 816; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP0]], i32 4 817; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]] 818; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP0]], i32 5 819; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]] 820; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP0]], i32 6 821; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP13]] 822; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP0]], i32 7 823; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]] 824; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP2]], align 1 825; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = load i8, i8* [[TMP4]], align 1 826; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = load i8, i8* [[TMP6]], align 1 827; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = load i8, i8* [[TMP8]], align 1 828; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP10]], align 1 829; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP12]], align 1 830; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP14]], align 1 831; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = load i8, i8* [[TMP16]], align 1 832; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = insertelement <8 x i8> undef, i8 [[TMP17]], i32 0 833; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP18]], i32 1 834; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = insertelement <8 x i8> [[TMP26]], i8 [[TMP19]], i32 2 835; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP20]], i32 3 836; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = insertelement <8 x i8> [[TMP28]], i8 [[TMP21]], i32 4 837; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = insertelement <8 x i8> [[TMP29]], i8 [[TMP22]], i32 5 838; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = insertelement <8 x i8> [[TMP30]], i8 [[TMP23]], i32 6 839; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP24]], i32 7 840; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 841; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = bitcast i8* [[TMP33]] to <8 x i8>* 842; DISABLED_MASKED_STRIDED-NEXT: store <8 x i8> [[TMP32]], <8 x i8>* [[TMP34]], align 1 843; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 844; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 845; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 846; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP35]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 847; DISABLED_MASKED_STRIDED: for.end: 848; DISABLED_MASKED_STRIDED-NEXT: ret void 849; 850; ENABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize( 851; ENABLED_MASKED_STRIDED-NEXT: entry: 852; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 853; ENABLED_MASKED_STRIDED: vector.body: 854; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 855; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = shl nuw nsw i32 [[INDEX]], 1 856; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP0]] 857; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <16 x i8>* 858; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP2]], i32 1, <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> undef) 859; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 860; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 861; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>* 862; ENABLED_MASKED_STRIDED-NEXT: store <8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP4]], align 1 863; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 864; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 865; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7 866; ENABLED_MASKED_STRIDED: for.end: 867; ENABLED_MASKED_STRIDED-NEXT: ret void 868; 869entry: 870 br label %for.body 871 872for.body: 873 %ix.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 874 %mul = shl nuw nsw i32 %ix.06, 1 875 %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul 876 %0 = load i8, i8* %arrayidx, align 1 877 %arrayidx1 = getelementptr inbounds i8, i8* %q, i32 %ix.06 878 store i8 %0, i8* %arrayidx1, align 1 879 %inc = add nuw nsw i32 %ix.06, 1 880 %exitcond = icmp eq i32 %inc, 1024 881 br i1 %exitcond, label %for.end, label %for.body 882 883for.end: 884 ret void 885} 886 887 888 889; Unconditioal accesses with gaps under Optsize scenario again, with unknown 890; trip-count this time, in order to check the behavior of folding-the-tail 891; (folding the remainder loop into the main loop using masking) together with 892; interleaved-groups. Folding-the-tail turns the accesses to conditional which 893; requires proper masking. In addition we need to mask out the gaps (all 894; because we are not allowed to use an epilog due to optsize). 895; When enable-masked-interleaved-access is disabled, the interleave-groups will 896; be invalidated during cost-model checks. So there we check for no epilogue 897; and for scalarized conditional accesses. 898; When masked-interleave-group is enabled we check that there is no epilogue, 899; and that the interleave-groups are vectorized using proper masking (with 900; shuffling of the mask feeding the wide masked load/store). 901; The shuffled mask is also And-ed with the gaps mask. 902; 903; for(ix=0; ix < n; ++ix) { 904; char t = p[2*ix]; 905; q[ix] = t; 906; } 907; 908define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %n) local_unnamed_addr optsize { 909; DISABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize_unknown_tc( 910; DISABLED_MASKED_STRIDED-NEXT: entry: 911; DISABLED_MASKED_STRIDED-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 912; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP6]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 913; DISABLED_MASKED_STRIDED: vector.ph: 914; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 915; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 916; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 917; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 918; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 919; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 920; DISABLED_MASKED_STRIDED: vector.body: 921; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] 922; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] 923; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 924; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 925; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 926; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 927; DISABLED_MASKED_STRIDED: pred.load.if: 928; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 929; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]] 930; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 931; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0 932; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]] 933; DISABLED_MASKED_STRIDED: pred.load.continue: 934; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] 935; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 936; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 937; DISABLED_MASKED_STRIDED: pred.load.if1: 938; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 939; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]] 940; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1 941; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i32 1 942; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE2]] 943; DISABLED_MASKED_STRIDED: pred.load.continue2: 944; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] 945; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 946; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 947; DISABLED_MASKED_STRIDED: pred.load.if3: 948; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 949; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]] 950; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1 951; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i32 2 952; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] 953; DISABLED_MASKED_STRIDED: pred.load.continue4: 954; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] 955; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 956; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 957; DISABLED_MASKED_STRIDED: pred.load.if5: 958; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 959; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]] 960; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1 961; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i32 3 962; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] 963; DISABLED_MASKED_STRIDED: pred.load.continue6: 964; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 965; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 966; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] 967; DISABLED_MASKED_STRIDED: pred.load.if7: 968; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 969; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]] 970; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1 971; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i32 4 972; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] 973; DISABLED_MASKED_STRIDED: pred.load.continue8: 974; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ] 975; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 976; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] 977; DISABLED_MASKED_STRIDED: pred.load.if9: 978; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 979; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]] 980; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1 981; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i32 5 982; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] 983; DISABLED_MASKED_STRIDED: pred.load.continue10: 984; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ] 985; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 986; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] 987; DISABLED_MASKED_STRIDED: pred.load.if11: 988; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 989; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]] 990; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1 991; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i32 6 992; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] 993; DISABLED_MASKED_STRIDED: pred.load.continue12: 994; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ] 995; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 996; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] 997; DISABLED_MASKED_STRIDED: pred.load.if13: 998; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 999; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]] 1000; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1 1001; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i32 7 1002; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] 1003; DISABLED_MASKED_STRIDED: pred.load.continue14: 1004; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] 1005; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 1006; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>* 1007; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]]) 1008; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 1009; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1010; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 1011; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP52]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !6 1012; DISABLED_MASKED_STRIDED: for.end: 1013; DISABLED_MASKED_STRIDED-NEXT: ret void 1014; 1015; ENABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize_unknown_tc( 1016; ENABLED_MASKED_STRIDED-NEXT: entry: 1017; ENABLED_MASKED_STRIDED-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 1018; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP6]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 1019; ENABLED_MASKED_STRIDED: vector.ph: 1020; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 1021; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 1022; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 1023; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 1024; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 1025; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 1026; ENABLED_MASKED_STRIDED: vector.body: 1027; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1028; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0 1029; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer 1030; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1031; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]] 1032; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 1033; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] 1034; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* 1035; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 1036; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 1037; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> undef) 1038; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1039; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]] 1040; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>* 1041; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]]) 1042; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 1043; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 1044; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !8 1045; ENABLED_MASKED_STRIDED: for.end: 1046; ENABLED_MASKED_STRIDED-NEXT: ret void 1047; 1048entry: 1049 %cmp6 = icmp sgt i32 %n, 0 1050 br i1 %cmp6, label %for.body.preheader, label %for.end 1051 1052for.body.preheader: 1053 br label %for.body 1054 1055for.body: 1056 %ix.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] 1057 %mul = shl nuw nsw i32 %ix.07, 1 1058 %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul 1059 %0 = load i8, i8* %arrayidx, align 1 1060 %arrayidx1 = getelementptr inbounds i8, i8* %q, i32 %ix.07 1061 store i8 %0, i8* %arrayidx1, align 1 1062 %inc = add nuw nsw i32 %ix.07, 1 1063 %exitcond = icmp eq i32 %inc, %n 1064 br i1 %exitcond, label %for.end.loopexit, label %for.body 1065 1066for.end.loopexit: 1067 br label %for.end 1068 1069for.end: 1070 ret void 1071} 1072 1073 1074; Check also a scenario with full interleave-groups (no gaps) as well as both 1075; load and store groups. We check that when masked-interleave-group is disabled 1076; the predicated loads (and stores) are not vectorized as an 1077; interleaved-group but rather as four separate scalarized accesses. 1078; (For SKX, gather/scatter is not supported by the compiler for chars, therefore 1079; the only remaining alternative is to scalarize). 1080; When masked-interleave-group is enabled we expect to find the proper mask 1081; shuffling code, feeding the wide masked load/store for the two interleave- 1082; groups. 1083; 1084; void masked_strided2(const unsigned char* restrict p, 1085; unsigned char* restrict q, 1086; unsigned char guard) { 1087; for(ix=0; ix < 1024; ++ix) { 1088; if (ix > guard) { 1089; char left = p[2*ix]; 1090; char right = p[2*ix + 1]; 1091; char max = max(left, right); 1092; q[2*ix] = max; 1093; q[2*ix+1] = 0 - max; 1094; } 1095; } 1096;} 1097; 1098define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr { 1099; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2( 1100; DISABLED_MASKED_STRIDED-NEXT: entry: 1101; DISABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 1102; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 1103; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 1104; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 1105; DISABLED_MASKED_STRIDED: vector.body: 1106; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] 1107; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] 1108; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 1109; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1110; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 1111; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1112; DISABLED_MASKED_STRIDED: pred.load.if: 1113; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 1114; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]] 1115; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 1116; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0 1117; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]] 1118; DISABLED_MASKED_STRIDED: pred.load.continue: 1119; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] 1120; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 1121; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1122; DISABLED_MASKED_STRIDED: pred.load.if1: 1123; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 1124; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]] 1125; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1 1126; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i32 1 1127; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1128; DISABLED_MASKED_STRIDED: pred.load.continue2: 1129; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] 1130; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 1131; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1132; DISABLED_MASKED_STRIDED: pred.load.if3: 1133; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 1134; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]] 1135; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1 1136; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i32 2 1137; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1138; DISABLED_MASKED_STRIDED: pred.load.continue4: 1139; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] 1140; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 1141; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 1142; DISABLED_MASKED_STRIDED: pred.load.if5: 1143; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 1144; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]] 1145; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1 1146; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i32 3 1147; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1148; DISABLED_MASKED_STRIDED: pred.load.continue6: 1149; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 1150; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 1151; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] 1152; DISABLED_MASKED_STRIDED: pred.load.if7: 1153; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 1154; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]] 1155; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1 1156; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i32 4 1157; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] 1158; DISABLED_MASKED_STRIDED: pred.load.continue8: 1159; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ] 1160; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 1161; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] 1162; DISABLED_MASKED_STRIDED: pred.load.if9: 1163; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 1164; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]] 1165; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1 1166; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i32 5 1167; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] 1168; DISABLED_MASKED_STRIDED: pred.load.continue10: 1169; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ] 1170; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 1171; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] 1172; DISABLED_MASKED_STRIDED: pred.load.if11: 1173; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 1174; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]] 1175; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1 1176; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i32 6 1177; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] 1178; DISABLED_MASKED_STRIDED: pred.load.continue12: 1179; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ] 1180; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 1181; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]] 1182; DISABLED_MASKED_STRIDED: pred.load.if13: 1183; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 1184; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]] 1185; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1 1186; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i32 7 1187; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] 1188; DISABLED_MASKED_STRIDED: pred.load.continue14: 1189; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] 1190; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or <8 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1191; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 1192; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] 1193; DISABLED_MASKED_STRIDED: pred.load.if15: 1194; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i32 0 1195; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP52]] 1196; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = load i8, i8* [[TMP53]], align 1 1197; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = insertelement <8 x i8> undef, i8 [[TMP54]], i32 0 1198; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] 1199; DISABLED_MASKED_STRIDED: pred.load.continue16: 1200; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = phi <8 x i8> [ undef, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ] 1201; DISABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 1202; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]] 1203; DISABLED_MASKED_STRIDED: pred.load.if17: 1204; DISABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i32 1 1205; DISABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP58]] 1206; DISABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = load i8, i8* [[TMP59]], align 1 1207; DISABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i32 1 1208; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE18]] 1209; DISABLED_MASKED_STRIDED: pred.load.continue18: 1210; DISABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = phi <8 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP61]], [[PRED_LOAD_IF17]] ] 1211; DISABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 1212; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]] 1213; DISABLED_MASKED_STRIDED: pred.load.if19: 1214; DISABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i32 2 1215; DISABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP64]] 1216; DISABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = load i8, i8* [[TMP65]], align 1 1217; DISABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i32 2 1218; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE20]] 1219; DISABLED_MASKED_STRIDED: pred.load.continue20: 1220; DISABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = phi <8 x i8> [ [[TMP62]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP67]], [[PRED_LOAD_IF19]] ] 1221; DISABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 1222; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]] 1223; DISABLED_MASKED_STRIDED: pred.load.if21: 1224; DISABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i32 3 1225; DISABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP70]] 1226; DISABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = load i8, i8* [[TMP71]], align 1 1227; DISABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i32 3 1228; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE22]] 1229; DISABLED_MASKED_STRIDED: pred.load.continue22: 1230; DISABLED_MASKED_STRIDED-NEXT: [[TMP74:%.*]] = phi <8 x i8> [ [[TMP68]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP73]], [[PRED_LOAD_IF21]] ] 1231; DISABLED_MASKED_STRIDED-NEXT: [[TMP75:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 1232; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]] 1233; DISABLED_MASKED_STRIDED: pred.load.if23: 1234; DISABLED_MASKED_STRIDED-NEXT: [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i32 4 1235; DISABLED_MASKED_STRIDED-NEXT: [[TMP77:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP76]] 1236; DISABLED_MASKED_STRIDED-NEXT: [[TMP78:%.*]] = load i8, i8* [[TMP77]], align 1 1237; DISABLED_MASKED_STRIDED-NEXT: [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i32 4 1238; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE24]] 1239; DISABLED_MASKED_STRIDED: pred.load.continue24: 1240; DISABLED_MASKED_STRIDED-NEXT: [[TMP80:%.*]] = phi <8 x i8> [ [[TMP74]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP79]], [[PRED_LOAD_IF23]] ] 1241; DISABLED_MASKED_STRIDED-NEXT: [[TMP81:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 1242; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]] 1243; DISABLED_MASKED_STRIDED: pred.load.if25: 1244; DISABLED_MASKED_STRIDED-NEXT: [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i32 5 1245; DISABLED_MASKED_STRIDED-NEXT: [[TMP83:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP82]] 1246; DISABLED_MASKED_STRIDED-NEXT: [[TMP84:%.*]] = load i8, i8* [[TMP83]], align 1 1247; DISABLED_MASKED_STRIDED-NEXT: [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i32 5 1248; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE26]] 1249; DISABLED_MASKED_STRIDED: pred.load.continue26: 1250; DISABLED_MASKED_STRIDED-NEXT: [[TMP86:%.*]] = phi <8 x i8> [ [[TMP80]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ] 1251; DISABLED_MASKED_STRIDED-NEXT: [[TMP87:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 1252; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]] 1253; DISABLED_MASKED_STRIDED: pred.load.if27: 1254; DISABLED_MASKED_STRIDED-NEXT: [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i32 6 1255; DISABLED_MASKED_STRIDED-NEXT: [[TMP89:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP88]] 1256; DISABLED_MASKED_STRIDED-NEXT: [[TMP90:%.*]] = load i8, i8* [[TMP89]], align 1 1257; DISABLED_MASKED_STRIDED-NEXT: [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i32 6 1258; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE28]] 1259; DISABLED_MASKED_STRIDED: pred.load.continue28: 1260; DISABLED_MASKED_STRIDED-NEXT: [[TMP92:%.*]] = phi <8 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP91]], [[PRED_LOAD_IF27]] ] 1261; DISABLED_MASKED_STRIDED-NEXT: [[TMP93:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 1262; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]] 1263; DISABLED_MASKED_STRIDED: pred.load.if29: 1264; DISABLED_MASKED_STRIDED-NEXT: [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i32 7 1265; DISABLED_MASKED_STRIDED-NEXT: [[TMP95:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP94]] 1266; DISABLED_MASKED_STRIDED-NEXT: [[TMP96:%.*]] = load i8, i8* [[TMP95]], align 1 1267; DISABLED_MASKED_STRIDED-NEXT: [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i32 7 1268; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE30]] 1269; DISABLED_MASKED_STRIDED: pred.load.continue30: 1270; DISABLED_MASKED_STRIDED-NEXT: [[TMP98:%.*]] = phi <8 x i8> [ [[TMP92]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP97]], [[PRED_LOAD_IF29]] ] 1271; DISABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = icmp slt <8 x i8> [[TMP49]], [[TMP98]] 1272; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = select <8 x i1> [[TMP99]], <8 x i8> [[TMP98]], <8 x i8> [[TMP49]] 1273; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 1274; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP101]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1275; DISABLED_MASKED_STRIDED: pred.store.if: 1276; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 1277; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP102]] 1278; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i8> [[TMP100]], i32 0 1279; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP104]], i8* [[TMP103]], align 1 1280; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] 1281; DISABLED_MASKED_STRIDED: pred.store.continue: 1282; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 1283; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP105]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] 1284; DISABLED_MASKED_STRIDED: pred.store.if31: 1285; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 1286; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]] 1287; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i8> [[TMP100]], i32 1 1288; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP108]], i8* [[TMP107]], align 1 1289; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE32]] 1290; DISABLED_MASKED_STRIDED: pred.store.continue32: 1291; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 1292; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP109]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] 1293; DISABLED_MASKED_STRIDED: pred.store.if33: 1294; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 1295; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]] 1296; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i8> [[TMP100]], i32 2 1297; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP112]], i8* [[TMP111]], align 1 1298; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE34]] 1299; DISABLED_MASKED_STRIDED: pred.store.continue34: 1300; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 1301; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP113]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] 1302; DISABLED_MASKED_STRIDED: pred.store.if35: 1303; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 1304; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]] 1305; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i8> [[TMP100]], i32 3 1306; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP116]], i8* [[TMP115]], align 1 1307; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE36]] 1308; DISABLED_MASKED_STRIDED: pred.store.continue36: 1309; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 1310; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP117]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] 1311; DISABLED_MASKED_STRIDED: pred.store.if37: 1312; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 1313; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]] 1314; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i8> [[TMP100]], i32 4 1315; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP120]], i8* [[TMP119]], align 1 1316; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE38]] 1317; DISABLED_MASKED_STRIDED: pred.store.continue38: 1318; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 1319; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP121]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] 1320; DISABLED_MASKED_STRIDED: pred.store.if39: 1321; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 1322; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]] 1323; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i8> [[TMP100]], i32 5 1324; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP124]], i8* [[TMP123]], align 1 1325; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE40]] 1326; DISABLED_MASKED_STRIDED: pred.store.continue40: 1327; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 1328; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP125]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] 1329; DISABLED_MASKED_STRIDED: pred.store.if41: 1330; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 1331; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]] 1332; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i8> [[TMP100]], i32 6 1333; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP128]], i8* [[TMP127]], align 1 1334; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE42]] 1335; DISABLED_MASKED_STRIDED: pred.store.continue42: 1336; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 1337; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP129]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] 1338; DISABLED_MASKED_STRIDED: pred.store.if43: 1339; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 1340; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]] 1341; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = extractelement <8 x i8> [[TMP100]], i32 7 1342; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP132]], i8* [[TMP131]], align 1 1343; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] 1344; DISABLED_MASKED_STRIDED: pred.store.continue44: 1345; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = sub <8 x i8> zeroinitializer, [[TMP100]] 1346; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 1347; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP134]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] 1348; DISABLED_MASKED_STRIDED: pred.store.if45: 1349; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i32> [[TMP50]], i32 0 1350; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP135]] 1351; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i8> [[TMP133]], i32 0 1352; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP137]], i8* [[TMP136]], align 1 1353; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] 1354; DISABLED_MASKED_STRIDED: pred.store.continue46: 1355; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 1356; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP138]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] 1357; DISABLED_MASKED_STRIDED: pred.store.if47: 1358; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i32> [[TMP50]], i32 1 1359; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]] 1360; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP133]], i32 1 1361; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], i8* [[TMP140]], align 1 1362; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] 1363; DISABLED_MASKED_STRIDED: pred.store.continue48: 1364; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 1365; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP142]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] 1366; DISABLED_MASKED_STRIDED: pred.store.if49: 1367; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i32> [[TMP50]], i32 2 1368; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]] 1369; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i8> [[TMP133]], i32 2 1370; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP145]], i8* [[TMP144]], align 1 1371; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] 1372; DISABLED_MASKED_STRIDED: pred.store.continue50: 1373; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 1374; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP146]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] 1375; DISABLED_MASKED_STRIDED: pred.store.if51: 1376; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i32> [[TMP50]], i32 3 1377; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]] 1378; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i8> [[TMP133]], i32 3 1379; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP149]], i8* [[TMP148]], align 1 1380; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] 1381; DISABLED_MASKED_STRIDED: pred.store.continue52: 1382; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 1383; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP150]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] 1384; DISABLED_MASKED_STRIDED: pred.store.if53: 1385; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i32> [[TMP50]], i32 4 1386; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]] 1387; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP133]], i32 4 1388; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], i8* [[TMP152]], align 1 1389; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] 1390; DISABLED_MASKED_STRIDED: pred.store.continue54: 1391; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 1392; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP154]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] 1393; DISABLED_MASKED_STRIDED: pred.store.if55: 1394; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i32> [[TMP50]], i32 5 1395; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]] 1396; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i8> [[TMP133]], i32 5 1397; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP157]], i8* [[TMP156]], align 1 1398; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] 1399; DISABLED_MASKED_STRIDED: pred.store.continue56: 1400; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 1401; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP158]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] 1402; DISABLED_MASKED_STRIDED: pred.store.if57: 1403; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i32> [[TMP50]], i32 6 1404; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]] 1405; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i8> [[TMP133]], i32 6 1406; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP161]], i8* [[TMP160]], align 1 1407; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] 1408; DISABLED_MASKED_STRIDED: pred.store.continue58: 1409; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 1410; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP162]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] 1411; DISABLED_MASKED_STRIDED: pred.store.if59: 1412; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i32> [[TMP50]], i32 7 1413; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]] 1414; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = extractelement <8 x i8> [[TMP133]], i32 7 1415; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP165]], i8* [[TMP164]], align 1 1416; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] 1417; DISABLED_MASKED_STRIDED: pred.store.continue60: 1418; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 1419; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1420; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 1421; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7 1422; DISABLED_MASKED_STRIDED: for.end: 1423; DISABLED_MASKED_STRIDED-NEXT: ret void 1424; 1425; ENABLED_MASKED_STRIDED-LABEL: @masked_strided2( 1426; ENABLED_MASKED_STRIDED-NEXT: entry: 1427; ENABLED_MASKED_STRIDED-NEXT: [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32 1428; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0 1429; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 1430; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 1431; ENABLED_MASKED_STRIDED: vector.body: 1432; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1433; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1434; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 1435; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 1436; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] 1437; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* 1438; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 1439; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) 1440; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1441; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1442; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 1443; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC1]] 1444; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC1]], <8 x i8> [[STRIDED_VEC]] 1445; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]] 1446; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 1447; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i32 [[TMP4]] 1448; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* 1449; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1450; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) 1451; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 1452; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1453; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 1454; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop !9 1455; ENABLED_MASKED_STRIDED: for.end: 1456; ENABLED_MASKED_STRIDED-NEXT: ret void 1457; 1458entry: 1459 %conv = zext i8 %guard to i32 1460 br label %for.body 1461 1462for.body: 1463 %ix.024 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 1464 %cmp1 = icmp ugt i32 %ix.024, %conv 1465 br i1 %cmp1, label %if.then, label %for.inc 1466 1467if.then: 1468 %mul = shl nuw nsw i32 %ix.024, 1 1469 %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul 1470 %0 = load i8, i8* %arrayidx, align 1 1471 %add = or i32 %mul, 1 1472 %arrayidx4 = getelementptr inbounds i8, i8* %p, i32 %add 1473 %1 = load i8, i8* %arrayidx4, align 1 1474 %cmp.i = icmp slt i8 %0, %1 1475 %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0 1476 %arrayidx6 = getelementptr inbounds i8, i8* %q, i32 %mul 1477 store i8 %spec.select.i, i8* %arrayidx6, align 1 1478 %sub = sub i8 0, %spec.select.i 1479 %arrayidx11 = getelementptr inbounds i8, i8* %q, i32 %add 1480 store i8 %sub, i8* %arrayidx11, align 1 1481 br label %for.inc 1482 1483for.inc: 1484 %inc = add nuw nsw i32 %ix.024, 1 1485 %exitcond = icmp eq i32 %inc, 1024 1486 br i1 %exitcond, label %for.end, label %for.body 1487 1488for.end: 1489 ret void 1490} 1491 1492; Full groups again, this time checking an Optsize scenario, with unknown trip- 1493; count, to check the behavior of folding-the-tail (folding the remainder loop 1494; into the main loop using masking) together with interleaved-groups. 1495; When masked-interleave-group is disabled the interleave-groups will be 1496; invalidated during Legality check, so nothing to check here. 1497; When masked-interleave-group is enabled we check that there is no epilogue, 1498; and that the interleave-groups are vectorized using proper masking (with 1499; shuffling of the mask feeding the wide masked load/store). 1500; The mask itself is an And of two masks: one that masks away the remainder 1501; iterations, and one that masks away the 'else' of the 'if' statement. 1502; 1503; void masked_strided2_unknown_tc(const unsigned char* restrict p, 1504; unsigned char* restrict q, 1505; unsigned char guard, 1506; int n) { 1507; for(ix=0; ix < n; ++ix) { 1508; if (ix > guard) { 1509; char left = p[2*ix]; 1510; char right = p[2*ix + 1]; 1511; char max = max(left, right); 1512; q[2*ix] = max; 1513; q[2*ix+1] = 0 - max; 1514; } 1515; } 1516;} 1517; 1518define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %guard, i32 %n) local_unnamed_addr optsize { 1519; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2_unknown_tc( 1520; DISABLED_MASKED_STRIDED-NEXT: entry: 1521; DISABLED_MASKED_STRIDED-NEXT: [[CMP22:%.*]] = icmp sgt i32 [[N:%.*]], 0 1522; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP22]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 1523; DISABLED_MASKED_STRIDED: vector.ph: 1524; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 1525; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 1526; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 1527; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 1528; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 1529; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[GUARD:%.*]], i32 0 1530; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer 1531; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 1532; DISABLED_MASKED_STRIDED: vector.body: 1533; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE62:%.*]] ] 1534; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE62]] ] 1535; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] 1536; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 1537; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1538; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] 1539; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 1540; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1541; DISABLED_MASKED_STRIDED: pred.load.if: 1542; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 1543; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]] 1544; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1 1545; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> undef, i8 [[TMP7]], i32 0 1546; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]] 1547; DISABLED_MASKED_STRIDED: pred.load.continue: 1548; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] 1549; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1 1550; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1551; DISABLED_MASKED_STRIDED: pred.load.if3: 1552; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 1553; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]] 1554; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1 1555; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i32 1 1556; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1557; DISABLED_MASKED_STRIDED: pred.load.continue4: 1558; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = phi <8 x i8> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] 1559; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2 1560; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 1561; DISABLED_MASKED_STRIDED: pred.load.if5: 1562; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 1563; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]] 1564; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1 1565; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i32 2 1566; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1567; DISABLED_MASKED_STRIDED: pred.load.continue6: 1568; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = phi <8 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP20]], [[PRED_LOAD_IF5]] ] 1569; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3 1570; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] 1571; DISABLED_MASKED_STRIDED: pred.load.if7: 1572; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 1573; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]] 1574; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1 1575; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i32 3 1576; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] 1577; DISABLED_MASKED_STRIDED: pred.load.continue8: 1578; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = phi <8 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP26]], [[PRED_LOAD_IF7]] ] 1579; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4 1580; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] 1581; DISABLED_MASKED_STRIDED: pred.load.if9: 1582; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 1583; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]] 1584; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1 1585; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i32 4 1586; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] 1587; DISABLED_MASKED_STRIDED: pred.load.continue10: 1588; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = phi <8 x i8> [ [[TMP27]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ] 1589; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5 1590; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] 1591; DISABLED_MASKED_STRIDED: pred.load.if11: 1592; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 1593; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]] 1594; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1 1595; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i32 5 1596; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] 1597; DISABLED_MASKED_STRIDED: pred.load.continue12: 1598; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = phi <8 x i8> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP38]], [[PRED_LOAD_IF11]] ] 1599; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6 1600; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]] 1601; DISABLED_MASKED_STRIDED: pred.load.if13: 1602; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 1603; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]] 1604; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1 1605; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i32 6 1606; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] 1607; DISABLED_MASKED_STRIDED: pred.load.continue14: 1608; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = phi <8 x i8> [ [[TMP39]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP44]], [[PRED_LOAD_IF13]] ] 1609; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7 1610; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] 1611; DISABLED_MASKED_STRIDED: pred.load.if15: 1612; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 1613; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]] 1614; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1 1615; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i32 7 1616; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] 1617; DISABLED_MASKED_STRIDED: pred.load.continue16: 1618; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ] 1619; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = or <8 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1620; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 1621; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]] 1622; DISABLED_MASKED_STRIDED: pred.load.if17: 1623; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = extractelement <8 x i32> [[TMP52]], i32 0 1624; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP54]] 1625; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = load i8, i8* [[TMP55]], align 1 1626; DISABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = insertelement <8 x i8> undef, i8 [[TMP56]], i32 0 1627; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE18]] 1628; DISABLED_MASKED_STRIDED: pred.load.continue18: 1629; DISABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = phi <8 x i8> [ undef, [[PRED_LOAD_CONTINUE16]] ], [ [[TMP57]], [[PRED_LOAD_IF17]] ] 1630; DISABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1 1631; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP59]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]] 1632; DISABLED_MASKED_STRIDED: pred.load.if19: 1633; DISABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = extractelement <8 x i32> [[TMP52]], i32 1 1634; DISABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP60]] 1635; DISABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = load i8, i8* [[TMP61]], align 1 1636; DISABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = insertelement <8 x i8> [[TMP58]], i8 [[TMP62]], i32 1 1637; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE20]] 1638; DISABLED_MASKED_STRIDED: pred.load.continue20: 1639; DISABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = phi <8 x i8> [ [[TMP58]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP63]], [[PRED_LOAD_IF19]] ] 1640; DISABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2 1641; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP65]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]] 1642; DISABLED_MASKED_STRIDED: pred.load.if21: 1643; DISABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = extractelement <8 x i32> [[TMP52]], i32 2 1644; DISABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP66]] 1645; DISABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = load i8, i8* [[TMP67]], align 1 1646; DISABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = insertelement <8 x i8> [[TMP64]], i8 [[TMP68]], i32 2 1647; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE22]] 1648; DISABLED_MASKED_STRIDED: pred.load.continue22: 1649; DISABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = phi <8 x i8> [ [[TMP64]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP69]], [[PRED_LOAD_IF21]] ] 1650; DISABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3 1651; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP71]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]] 1652; DISABLED_MASKED_STRIDED: pred.load.if23: 1653; DISABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = extractelement <8 x i32> [[TMP52]], i32 3 1654; DISABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP72]] 1655; DISABLED_MASKED_STRIDED-NEXT: [[TMP74:%.*]] = load i8, i8* [[TMP73]], align 1 1656; DISABLED_MASKED_STRIDED-NEXT: [[TMP75:%.*]] = insertelement <8 x i8> [[TMP70]], i8 [[TMP74]], i32 3 1657; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE24]] 1658; DISABLED_MASKED_STRIDED: pred.load.continue24: 1659; DISABLED_MASKED_STRIDED-NEXT: [[TMP76:%.*]] = phi <8 x i8> [ [[TMP70]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP75]], [[PRED_LOAD_IF23]] ] 1660; DISABLED_MASKED_STRIDED-NEXT: [[TMP77:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4 1661; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP77]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]] 1662; DISABLED_MASKED_STRIDED: pred.load.if25: 1663; DISABLED_MASKED_STRIDED-NEXT: [[TMP78:%.*]] = extractelement <8 x i32> [[TMP52]], i32 4 1664; DISABLED_MASKED_STRIDED-NEXT: [[TMP79:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP78]] 1665; DISABLED_MASKED_STRIDED-NEXT: [[TMP80:%.*]] = load i8, i8* [[TMP79]], align 1 1666; DISABLED_MASKED_STRIDED-NEXT: [[TMP81:%.*]] = insertelement <8 x i8> [[TMP76]], i8 [[TMP80]], i32 4 1667; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE26]] 1668; DISABLED_MASKED_STRIDED: pred.load.continue26: 1669; DISABLED_MASKED_STRIDED-NEXT: [[TMP82:%.*]] = phi <8 x i8> [ [[TMP76]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP81]], [[PRED_LOAD_IF25]] ] 1670; DISABLED_MASKED_STRIDED-NEXT: [[TMP83:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5 1671; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP83]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]] 1672; DISABLED_MASKED_STRIDED: pred.load.if27: 1673; DISABLED_MASKED_STRIDED-NEXT: [[TMP84:%.*]] = extractelement <8 x i32> [[TMP52]], i32 5 1674; DISABLED_MASKED_STRIDED-NEXT: [[TMP85:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP84]] 1675; DISABLED_MASKED_STRIDED-NEXT: [[TMP86:%.*]] = load i8, i8* [[TMP85]], align 1 1676; DISABLED_MASKED_STRIDED-NEXT: [[TMP87:%.*]] = insertelement <8 x i8> [[TMP82]], i8 [[TMP86]], i32 5 1677; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE28]] 1678; DISABLED_MASKED_STRIDED: pred.load.continue28: 1679; DISABLED_MASKED_STRIDED-NEXT: [[TMP88:%.*]] = phi <8 x i8> [ [[TMP82]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP87]], [[PRED_LOAD_IF27]] ] 1680; DISABLED_MASKED_STRIDED-NEXT: [[TMP89:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6 1681; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP89]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]] 1682; DISABLED_MASKED_STRIDED: pred.load.if29: 1683; DISABLED_MASKED_STRIDED-NEXT: [[TMP90:%.*]] = extractelement <8 x i32> [[TMP52]], i32 6 1684; DISABLED_MASKED_STRIDED-NEXT: [[TMP91:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP90]] 1685; DISABLED_MASKED_STRIDED-NEXT: [[TMP92:%.*]] = load i8, i8* [[TMP91]], align 1 1686; DISABLED_MASKED_STRIDED-NEXT: [[TMP93:%.*]] = insertelement <8 x i8> [[TMP88]], i8 [[TMP92]], i32 6 1687; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE30]] 1688; DISABLED_MASKED_STRIDED: pred.load.continue30: 1689; DISABLED_MASKED_STRIDED-NEXT: [[TMP94:%.*]] = phi <8 x i8> [ [[TMP88]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP93]], [[PRED_LOAD_IF29]] ] 1690; DISABLED_MASKED_STRIDED-NEXT: [[TMP95:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7 1691; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP95]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]] 1692; DISABLED_MASKED_STRIDED: pred.load.if31: 1693; DISABLED_MASKED_STRIDED-NEXT: [[TMP96:%.*]] = extractelement <8 x i32> [[TMP52]], i32 7 1694; DISABLED_MASKED_STRIDED-NEXT: [[TMP97:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP96]] 1695; DISABLED_MASKED_STRIDED-NEXT: [[TMP98:%.*]] = load i8, i8* [[TMP97]], align 1 1696; DISABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = insertelement <8 x i8> [[TMP94]], i8 [[TMP98]], i32 7 1697; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE32]] 1698; DISABLED_MASKED_STRIDED: pred.load.continue32: 1699; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = phi <8 x i8> [ [[TMP94]], [[PRED_LOAD_CONTINUE30]] ], [ [[TMP99]], [[PRED_LOAD_IF31]] ] 1700; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = icmp slt <8 x i8> [[TMP51]], [[TMP100]] 1701; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = select <8 x i1> [[TMP101]], <8 x i8> [[TMP100]], <8 x i8> [[TMP51]] 1702; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 1703; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP103]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1704; DISABLED_MASKED_STRIDED: pred.store.if: 1705; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 1706; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP104]] 1707; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i8> [[TMP102]], i32 0 1708; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP106]], i8* [[TMP105]], align 1 1709; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] 1710; DISABLED_MASKED_STRIDED: pred.store.continue: 1711; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1 1712; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP107]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] 1713; DISABLED_MASKED_STRIDED: pred.store.if33: 1714; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 1715; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP108]] 1716; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i8> [[TMP102]], i32 1 1717; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP110]], i8* [[TMP109]], align 1 1718; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE34]] 1719; DISABLED_MASKED_STRIDED: pred.store.continue34: 1720; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2 1721; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP111]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] 1722; DISABLED_MASKED_STRIDED: pred.store.if35: 1723; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 1724; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP112]] 1725; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i8> [[TMP102]], i32 2 1726; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP114]], i8* [[TMP113]], align 1 1727; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE36]] 1728; DISABLED_MASKED_STRIDED: pred.store.continue36: 1729; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3 1730; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP115]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] 1731; DISABLED_MASKED_STRIDED: pred.store.if37: 1732; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 1733; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP116]] 1734; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i8> [[TMP102]], i32 3 1735; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP118]], i8* [[TMP117]], align 1 1736; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE38]] 1737; DISABLED_MASKED_STRIDED: pred.store.continue38: 1738; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4 1739; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP119]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] 1740; DISABLED_MASKED_STRIDED: pred.store.if39: 1741; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 1742; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP120]] 1743; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i8> [[TMP102]], i32 4 1744; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP122]], i8* [[TMP121]], align 1 1745; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE40]] 1746; DISABLED_MASKED_STRIDED: pred.store.continue40: 1747; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5 1748; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP123]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] 1749; DISABLED_MASKED_STRIDED: pred.store.if41: 1750; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 1751; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP124]] 1752; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i8> [[TMP102]], i32 5 1753; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP126]], i8* [[TMP125]], align 1 1754; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE42]] 1755; DISABLED_MASKED_STRIDED: pred.store.continue42: 1756; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6 1757; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP127]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] 1758; DISABLED_MASKED_STRIDED: pred.store.if43: 1759; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 1760; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP128]] 1761; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i8> [[TMP102]], i32 6 1762; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP130]], i8* [[TMP129]], align 1 1763; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] 1764; DISABLED_MASKED_STRIDED: pred.store.continue44: 1765; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7 1766; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP131]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] 1767; DISABLED_MASKED_STRIDED: pred.store.if45: 1768; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 1769; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP132]] 1770; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i8> [[TMP102]], i32 7 1771; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP134]], i8* [[TMP133]], align 1 1772; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] 1773; DISABLED_MASKED_STRIDED: pred.store.continue46: 1774; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = sub <8 x i8> zeroinitializer, [[TMP102]] 1775; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 1776; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP136]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] 1777; DISABLED_MASKED_STRIDED: pred.store.if47: 1778; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i32> [[TMP52]], i32 0 1779; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP137]] 1780; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i8> [[TMP135]], i32 0 1781; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP139]], i8* [[TMP138]], align 1 1782; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] 1783; DISABLED_MASKED_STRIDED: pred.store.continue48: 1784; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1 1785; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP140]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] 1786; DISABLED_MASKED_STRIDED: pred.store.if49: 1787; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i32> [[TMP52]], i32 1 1788; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP141]] 1789; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i8> [[TMP135]], i32 1 1790; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP143]], i8* [[TMP142]], align 1 1791; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] 1792; DISABLED_MASKED_STRIDED: pred.store.continue50: 1793; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2 1794; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP144]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] 1795; DISABLED_MASKED_STRIDED: pred.store.if51: 1796; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i32> [[TMP52]], i32 2 1797; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP145]] 1798; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i8> [[TMP135]], i32 2 1799; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP147]], i8* [[TMP146]], align 1 1800; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] 1801; DISABLED_MASKED_STRIDED: pred.store.continue52: 1802; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3 1803; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP148]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] 1804; DISABLED_MASKED_STRIDED: pred.store.if53: 1805; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i32> [[TMP52]], i32 3 1806; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP149]] 1807; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i8> [[TMP135]], i32 3 1808; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP151]], i8* [[TMP150]], align 1 1809; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] 1810; DISABLED_MASKED_STRIDED: pred.store.continue54: 1811; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4 1812; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP152]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] 1813; DISABLED_MASKED_STRIDED: pred.store.if55: 1814; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i32> [[TMP52]], i32 4 1815; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP153]] 1816; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i8> [[TMP135]], i32 4 1817; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP155]], i8* [[TMP154]], align 1 1818; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] 1819; DISABLED_MASKED_STRIDED: pred.store.continue56: 1820; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5 1821; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP156]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] 1822; DISABLED_MASKED_STRIDED: pred.store.if57: 1823; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i32> [[TMP52]], i32 5 1824; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP157]] 1825; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i8> [[TMP135]], i32 5 1826; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP159]], i8* [[TMP158]], align 1 1827; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] 1828; DISABLED_MASKED_STRIDED: pred.store.continue58: 1829; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6 1830; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP160]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]] 1831; DISABLED_MASKED_STRIDED: pred.store.if59: 1832; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i32> [[TMP52]], i32 6 1833; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP161]] 1834; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i8> [[TMP135]], i32 6 1835; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP163]], i8* [[TMP162]], align 1 1836; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] 1837; DISABLED_MASKED_STRIDED: pred.store.continue60: 1838; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7 1839; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP164]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]] 1840; DISABLED_MASKED_STRIDED: pred.store.if61: 1841; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = extractelement <8 x i32> [[TMP52]], i32 7 1842; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP165]] 1843; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = extractelement <8 x i8> [[TMP135]], i32 7 1844; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP167]], i8* [[TMP166]], align 1 1845; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE62]] 1846; DISABLED_MASKED_STRIDED: pred.store.continue62: 1847; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 1848; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1849; DISABLED_MASKED_STRIDED-NEXT: [[TMP168:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 1850; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP168]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !8 1851; DISABLED_MASKED_STRIDED: for.end: 1852; DISABLED_MASKED_STRIDED-NEXT: ret void 1853; 1854; ENABLED_MASKED_STRIDED-LABEL: @masked_strided2_unknown_tc( 1855; ENABLED_MASKED_STRIDED-NEXT: entry: 1856; ENABLED_MASKED_STRIDED-NEXT: [[CMP22:%.*]] = icmp sgt i32 [[N:%.*]], 0 1857; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP22]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 1858; ENABLED_MASKED_STRIDED: vector.ph: 1859; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 1860; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 1861; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 1862; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 1863; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 1864; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[GUARD:%.*]], i32 0 1865; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer 1866; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 1867; ENABLED_MASKED_STRIDED: vector.body: 1868; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1869; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1870; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] 1871; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 1872; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1 1873; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]] 1874; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]] 1875; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>* 1876; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 1877; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) 1878; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1879; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1880; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], 1 1881; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] 1882; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] 1883; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = sub <8 x i8> zeroinitializer, [[TMP8]] 1884; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 1885; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[TMP10]], i32 [[TMP6]] 1886; ENABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to <16 x i8>* 1887; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP8]], <8 x i8> [[TMP9]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1888; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP12]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) 1889; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 1890; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 1891; ENABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 1892; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP13]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !10 1893; ENABLED_MASKED_STRIDED: for.end: 1894; ENABLED_MASKED_STRIDED-NEXT: ret void 1895; 1896entry: 1897 %cmp22 = icmp sgt i32 %n, 0 1898 br i1 %cmp22, label %for.body.preheader, label %for.end 1899 1900for.body.preheader: 1901 br label %for.body 1902 1903for.body: 1904 %ix.023 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ] 1905 %cmp1 = icmp sgt i32 %ix.023, %guard 1906 br i1 %cmp1, label %if.then, label %for.inc 1907 1908if.then: 1909 %mul = shl nuw nsw i32 %ix.023, 1 1910 %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul 1911 %0 = load i8, i8* %arrayidx, align 1 1912 %add = or i32 %mul, 1 1913 %arrayidx3 = getelementptr inbounds i8, i8* %p, i32 %add 1914 %1 = load i8, i8* %arrayidx3, align 1 1915 %cmp.i = icmp slt i8 %0, %1 1916 %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0 1917 %arrayidx5 = getelementptr inbounds i8, i8* %q, i32 %mul 1918 store i8 %spec.select.i, i8* %arrayidx5, align 1 1919 %sub = sub i8 0, %spec.select.i 1920 %arrayidx9 = getelementptr inbounds i8, i8* %q, i32 %add 1921 store i8 %sub, i8* %arrayidx9, align 1 1922 br label %for.inc 1923 1924for.inc: 1925 %inc = add nuw nsw i32 %ix.023, 1 1926 %exitcond = icmp eq i32 %inc, %n 1927 br i1 %exitcond, label %for.end.loopexit, label %for.body 1928 1929for.end.loopexit: 1930 br label %for.end 1931 1932for.end: 1933 ret void 1934} 1935 1936; Full groups under Optsize scenario again, with unknown trip-count, again in 1937; order to check the behavior of folding-the-tail (folding the remainder loop 1938; into the main loop using masking) together with interleaved-groups. 1939; This time the accesses are not conditional, they become conditional only 1940; due to tail folding. 1941; When masked-interleave-group is disabled the interleave-groups will be 1942; invalidated during cost-model checks, so we check for no epilogue and 1943; scalarized conditional accesses. 1944; When masked-interleave-group is enabled we check for no epilogue, 1945; and interleave-groups vectorized using proper masking (with 1946; shuffling of the mask feeding the wide masked load/store). 1947; (Same vectorization scheme as for the previous loop with conditional accesses 1948; except here the mask only masks away the remainder iterations.) 1949; 1950; void unconditional_masked_strided2_unknown_tc(const unsigned char* restrict p, 1951; unsigned char* restrict q, 1952; int n) { 1953; for(ix=0; ix < n; ++ix) { 1954; char left = p[2*ix]; 1955; char right = p[2*ix + 1]; 1956; char max = max(left, right); 1957; q[2*ix] = max; 1958; q[2*ix+1] = 0 - max; 1959; } 1960;} 1961; 1962define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %n) local_unnamed_addr optsize { 1963; DISABLED_MASKED_STRIDED-LABEL: @unconditional_masked_strided2_unknown_tc( 1964; DISABLED_MASKED_STRIDED-NEXT: entry: 1965; DISABLED_MASKED_STRIDED-NEXT: [[CMP20:%.*]] = icmp sgt i32 [[N:%.*]], 0 1966; DISABLED_MASKED_STRIDED-NEXT: br i1 [[CMP20]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 1967; DISABLED_MASKED_STRIDED: vector.ph: 1968; DISABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 1969; DISABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 1970; DISABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 1971; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 1972; DISABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 1973; DISABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 1974; DISABLED_MASKED_STRIDED: vector.body: 1975; DISABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ] 1976; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ] 1977; DISABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 1978; DISABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1979; DISABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 1980; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1981; DISABLED_MASKED_STRIDED: pred.load.if: 1982; DISABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 1983; DISABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]] 1984; DISABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 1985; DISABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> undef, i8 [[TMP5]], i32 0 1986; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE]] 1987; DISABLED_MASKED_STRIDED: pred.load.continue: 1988; DISABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = phi <8 x i8> [ undef, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] 1989; DISABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 1990; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1991; DISABLED_MASKED_STRIDED: pred.load.if1: 1992; DISABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 1993; DISABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]] 1994; DISABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1 1995; DISABLED_MASKED_STRIDED-NEXT: [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i32 1 1996; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1997; DISABLED_MASKED_STRIDED: pred.load.continue2: 1998; DISABLED_MASKED_STRIDED-NEXT: [[TMP13:%.*]] = phi <8 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] 1999; DISABLED_MASKED_STRIDED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 2000; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 2001; DISABLED_MASKED_STRIDED: pred.load.if3: 2002; DISABLED_MASKED_STRIDED-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 2003; DISABLED_MASKED_STRIDED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]] 2004; DISABLED_MASKED_STRIDED-NEXT: [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1 2005; DISABLED_MASKED_STRIDED-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i32 2 2006; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE4]] 2007; DISABLED_MASKED_STRIDED: pred.load.continue4: 2008; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = phi <8 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] 2009; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 2010; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 2011; DISABLED_MASKED_STRIDED: pred.load.if5: 2012; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 2013; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]] 2014; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1 2015; DISABLED_MASKED_STRIDED-NEXT: [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i32 3 2016; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE6]] 2017; DISABLED_MASKED_STRIDED: pred.load.continue6: 2018; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = phi <8 x i8> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 2019; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 2020; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] 2021; DISABLED_MASKED_STRIDED: pred.load.if7: 2022; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 2023; DISABLED_MASKED_STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]] 2024; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1 2025; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i32 4 2026; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE8]] 2027; DISABLED_MASKED_STRIDED: pred.load.continue8: 2028; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = phi <8 x i8> [ [[TMP25]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP30]], [[PRED_LOAD_IF7]] ] 2029; DISABLED_MASKED_STRIDED-NEXT: [[TMP32:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 2030; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] 2031; DISABLED_MASKED_STRIDED: pred.load.if9: 2032; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 2033; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]] 2034; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1 2035; DISABLED_MASKED_STRIDED-NEXT: [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i32 5 2036; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE10]] 2037; DISABLED_MASKED_STRIDED: pred.load.continue10: 2038; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = phi <8 x i8> [ [[TMP31]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ] 2039; DISABLED_MASKED_STRIDED-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 2040; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] 2041; DISABLED_MASKED_STRIDED: pred.load.if11: 2042; DISABLED_MASKED_STRIDED-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 2043; DISABLED_MASKED_STRIDED-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]] 2044; DISABLED_MASKED_STRIDED-NEXT: [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1 2045; DISABLED_MASKED_STRIDED-NEXT: [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i32 6 2046; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE12]] 2047; DISABLED_MASKED_STRIDED: pred.load.continue12: 2048; DISABLED_MASKED_STRIDED-NEXT: [[TMP43:%.*]] = phi <8 x i8> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ] 2049; DISABLED_MASKED_STRIDED-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 2050; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]] 2051; DISABLED_MASKED_STRIDED: pred.load.if13: 2052; DISABLED_MASKED_STRIDED-NEXT: [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 2053; DISABLED_MASKED_STRIDED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]] 2054; DISABLED_MASKED_STRIDED-NEXT: [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1 2055; DISABLED_MASKED_STRIDED-NEXT: [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i32 7 2056; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE14]] 2057; DISABLED_MASKED_STRIDED: pred.load.continue14: 2058; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] 2059; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = or <8 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2060; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 2061; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]] 2062; DISABLED_MASKED_STRIDED: pred.load.if15: 2063; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i32 0 2064; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP52]] 2065; DISABLED_MASKED_STRIDED-NEXT: [[TMP54:%.*]] = load i8, i8* [[TMP53]], align 1 2066; DISABLED_MASKED_STRIDED-NEXT: [[TMP55:%.*]] = insertelement <8 x i8> undef, i8 [[TMP54]], i32 0 2067; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE16]] 2068; DISABLED_MASKED_STRIDED: pred.load.continue16: 2069; DISABLED_MASKED_STRIDED-NEXT: [[TMP56:%.*]] = phi <8 x i8> [ undef, [[PRED_LOAD_CONTINUE14]] ], [ [[TMP55]], [[PRED_LOAD_IF15]] ] 2070; DISABLED_MASKED_STRIDED-NEXT: [[TMP57:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 2071; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]] 2072; DISABLED_MASKED_STRIDED: pred.load.if17: 2073; DISABLED_MASKED_STRIDED-NEXT: [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i32 1 2074; DISABLED_MASKED_STRIDED-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP58]] 2075; DISABLED_MASKED_STRIDED-NEXT: [[TMP60:%.*]] = load i8, i8* [[TMP59]], align 1 2076; DISABLED_MASKED_STRIDED-NEXT: [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i32 1 2077; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE18]] 2078; DISABLED_MASKED_STRIDED: pred.load.continue18: 2079; DISABLED_MASKED_STRIDED-NEXT: [[TMP62:%.*]] = phi <8 x i8> [ [[TMP56]], [[PRED_LOAD_CONTINUE16]] ], [ [[TMP61]], [[PRED_LOAD_IF17]] ] 2080; DISABLED_MASKED_STRIDED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 2081; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]] 2082; DISABLED_MASKED_STRIDED: pred.load.if19: 2083; DISABLED_MASKED_STRIDED-NEXT: [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i32 2 2084; DISABLED_MASKED_STRIDED-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP64]] 2085; DISABLED_MASKED_STRIDED-NEXT: [[TMP66:%.*]] = load i8, i8* [[TMP65]], align 1 2086; DISABLED_MASKED_STRIDED-NEXT: [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i32 2 2087; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE20]] 2088; DISABLED_MASKED_STRIDED: pred.load.continue20: 2089; DISABLED_MASKED_STRIDED-NEXT: [[TMP68:%.*]] = phi <8 x i8> [ [[TMP62]], [[PRED_LOAD_CONTINUE18]] ], [ [[TMP67]], [[PRED_LOAD_IF19]] ] 2090; DISABLED_MASKED_STRIDED-NEXT: [[TMP69:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 2091; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]] 2092; DISABLED_MASKED_STRIDED: pred.load.if21: 2093; DISABLED_MASKED_STRIDED-NEXT: [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i32 3 2094; DISABLED_MASKED_STRIDED-NEXT: [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP70]] 2095; DISABLED_MASKED_STRIDED-NEXT: [[TMP72:%.*]] = load i8, i8* [[TMP71]], align 1 2096; DISABLED_MASKED_STRIDED-NEXT: [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i32 3 2097; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE22]] 2098; DISABLED_MASKED_STRIDED: pred.load.continue22: 2099; DISABLED_MASKED_STRIDED-NEXT: [[TMP74:%.*]] = phi <8 x i8> [ [[TMP68]], [[PRED_LOAD_CONTINUE20]] ], [ [[TMP73]], [[PRED_LOAD_IF21]] ] 2100; DISABLED_MASKED_STRIDED-NEXT: [[TMP75:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 2101; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]] 2102; DISABLED_MASKED_STRIDED: pred.load.if23: 2103; DISABLED_MASKED_STRIDED-NEXT: [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i32 4 2104; DISABLED_MASKED_STRIDED-NEXT: [[TMP77:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP76]] 2105; DISABLED_MASKED_STRIDED-NEXT: [[TMP78:%.*]] = load i8, i8* [[TMP77]], align 1 2106; DISABLED_MASKED_STRIDED-NEXT: [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i32 4 2107; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE24]] 2108; DISABLED_MASKED_STRIDED: pred.load.continue24: 2109; DISABLED_MASKED_STRIDED-NEXT: [[TMP80:%.*]] = phi <8 x i8> [ [[TMP74]], [[PRED_LOAD_CONTINUE22]] ], [ [[TMP79]], [[PRED_LOAD_IF23]] ] 2110; DISABLED_MASKED_STRIDED-NEXT: [[TMP81:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 2111; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]] 2112; DISABLED_MASKED_STRIDED: pred.load.if25: 2113; DISABLED_MASKED_STRIDED-NEXT: [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i32 5 2114; DISABLED_MASKED_STRIDED-NEXT: [[TMP83:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP82]] 2115; DISABLED_MASKED_STRIDED-NEXT: [[TMP84:%.*]] = load i8, i8* [[TMP83]], align 1 2116; DISABLED_MASKED_STRIDED-NEXT: [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i32 5 2117; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE26]] 2118; DISABLED_MASKED_STRIDED: pred.load.continue26: 2119; DISABLED_MASKED_STRIDED-NEXT: [[TMP86:%.*]] = phi <8 x i8> [ [[TMP80]], [[PRED_LOAD_CONTINUE24]] ], [ [[TMP85]], [[PRED_LOAD_IF25]] ] 2120; DISABLED_MASKED_STRIDED-NEXT: [[TMP87:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 2121; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]] 2122; DISABLED_MASKED_STRIDED: pred.load.if27: 2123; DISABLED_MASKED_STRIDED-NEXT: [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i32 6 2124; DISABLED_MASKED_STRIDED-NEXT: [[TMP89:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP88]] 2125; DISABLED_MASKED_STRIDED-NEXT: [[TMP90:%.*]] = load i8, i8* [[TMP89]], align 1 2126; DISABLED_MASKED_STRIDED-NEXT: [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i32 6 2127; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE28]] 2128; DISABLED_MASKED_STRIDED: pred.load.continue28: 2129; DISABLED_MASKED_STRIDED-NEXT: [[TMP92:%.*]] = phi <8 x i8> [ [[TMP86]], [[PRED_LOAD_CONTINUE26]] ], [ [[TMP91]], [[PRED_LOAD_IF27]] ] 2130; DISABLED_MASKED_STRIDED-NEXT: [[TMP93:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 2131; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]] 2132; DISABLED_MASKED_STRIDED: pred.load.if29: 2133; DISABLED_MASKED_STRIDED-NEXT: [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i32 7 2134; DISABLED_MASKED_STRIDED-NEXT: [[TMP95:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP94]] 2135; DISABLED_MASKED_STRIDED-NEXT: [[TMP96:%.*]] = load i8, i8* [[TMP95]], align 1 2136; DISABLED_MASKED_STRIDED-NEXT: [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i32 7 2137; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_LOAD_CONTINUE30]] 2138; DISABLED_MASKED_STRIDED: pred.load.continue30: 2139; DISABLED_MASKED_STRIDED-NEXT: [[TMP98:%.*]] = phi <8 x i8> [ [[TMP92]], [[PRED_LOAD_CONTINUE28]] ], [ [[TMP97]], [[PRED_LOAD_IF29]] ] 2140; DISABLED_MASKED_STRIDED-NEXT: [[TMP99:%.*]] = icmp slt <8 x i8> [[TMP49]], [[TMP98]] 2141; DISABLED_MASKED_STRIDED-NEXT: [[TMP100:%.*]] = select <8 x i1> [[TMP99]], <8 x i8> [[TMP98]], <8 x i8> [[TMP49]] 2142; DISABLED_MASKED_STRIDED-NEXT: [[TMP101:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 2143; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP101]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 2144; DISABLED_MASKED_STRIDED: pred.store.if: 2145; DISABLED_MASKED_STRIDED-NEXT: [[TMP102:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 2146; DISABLED_MASKED_STRIDED-NEXT: [[TMP103:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP102]] 2147; DISABLED_MASKED_STRIDED-NEXT: [[TMP104:%.*]] = extractelement <8 x i8> [[TMP100]], i32 0 2148; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP104]], i8* [[TMP103]], align 1 2149; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE]] 2150; DISABLED_MASKED_STRIDED: pred.store.continue: 2151; DISABLED_MASKED_STRIDED-NEXT: [[TMP105:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 2152; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP105]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] 2153; DISABLED_MASKED_STRIDED: pred.store.if31: 2154; DISABLED_MASKED_STRIDED-NEXT: [[TMP106:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 2155; DISABLED_MASKED_STRIDED-NEXT: [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]] 2156; DISABLED_MASKED_STRIDED-NEXT: [[TMP108:%.*]] = extractelement <8 x i8> [[TMP100]], i32 1 2157; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP108]], i8* [[TMP107]], align 1 2158; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE32]] 2159; DISABLED_MASKED_STRIDED: pred.store.continue32: 2160; DISABLED_MASKED_STRIDED-NEXT: [[TMP109:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 2161; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP109]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]] 2162; DISABLED_MASKED_STRIDED: pred.store.if33: 2163; DISABLED_MASKED_STRIDED-NEXT: [[TMP110:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 2164; DISABLED_MASKED_STRIDED-NEXT: [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]] 2165; DISABLED_MASKED_STRIDED-NEXT: [[TMP112:%.*]] = extractelement <8 x i8> [[TMP100]], i32 2 2166; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP112]], i8* [[TMP111]], align 1 2167; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE34]] 2168; DISABLED_MASKED_STRIDED: pred.store.continue34: 2169; DISABLED_MASKED_STRIDED-NEXT: [[TMP113:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 2170; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP113]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]] 2171; DISABLED_MASKED_STRIDED: pred.store.if35: 2172; DISABLED_MASKED_STRIDED-NEXT: [[TMP114:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 2173; DISABLED_MASKED_STRIDED-NEXT: [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]] 2174; DISABLED_MASKED_STRIDED-NEXT: [[TMP116:%.*]] = extractelement <8 x i8> [[TMP100]], i32 3 2175; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP116]], i8* [[TMP115]], align 1 2176; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE36]] 2177; DISABLED_MASKED_STRIDED: pred.store.continue36: 2178; DISABLED_MASKED_STRIDED-NEXT: [[TMP117:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 2179; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP117]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]] 2180; DISABLED_MASKED_STRIDED: pred.store.if37: 2181; DISABLED_MASKED_STRIDED-NEXT: [[TMP118:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 2182; DISABLED_MASKED_STRIDED-NEXT: [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]] 2183; DISABLED_MASKED_STRIDED-NEXT: [[TMP120:%.*]] = extractelement <8 x i8> [[TMP100]], i32 4 2184; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP120]], i8* [[TMP119]], align 1 2185; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE38]] 2186; DISABLED_MASKED_STRIDED: pred.store.continue38: 2187; DISABLED_MASKED_STRIDED-NEXT: [[TMP121:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 2188; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP121]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]] 2189; DISABLED_MASKED_STRIDED: pred.store.if39: 2190; DISABLED_MASKED_STRIDED-NEXT: [[TMP122:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 2191; DISABLED_MASKED_STRIDED-NEXT: [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]] 2192; DISABLED_MASKED_STRIDED-NEXT: [[TMP124:%.*]] = extractelement <8 x i8> [[TMP100]], i32 5 2193; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP124]], i8* [[TMP123]], align 1 2194; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE40]] 2195; DISABLED_MASKED_STRIDED: pred.store.continue40: 2196; DISABLED_MASKED_STRIDED-NEXT: [[TMP125:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 2197; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP125]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]] 2198; DISABLED_MASKED_STRIDED: pred.store.if41: 2199; DISABLED_MASKED_STRIDED-NEXT: [[TMP126:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 2200; DISABLED_MASKED_STRIDED-NEXT: [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]] 2201; DISABLED_MASKED_STRIDED-NEXT: [[TMP128:%.*]] = extractelement <8 x i8> [[TMP100]], i32 6 2202; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP128]], i8* [[TMP127]], align 1 2203; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE42]] 2204; DISABLED_MASKED_STRIDED: pred.store.continue42: 2205; DISABLED_MASKED_STRIDED-NEXT: [[TMP129:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 2206; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP129]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] 2207; DISABLED_MASKED_STRIDED: pred.store.if43: 2208; DISABLED_MASKED_STRIDED-NEXT: [[TMP130:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 2209; DISABLED_MASKED_STRIDED-NEXT: [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]] 2210; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = extractelement <8 x i8> [[TMP100]], i32 7 2211; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP132]], i8* [[TMP131]], align 1 2212; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] 2213; DISABLED_MASKED_STRIDED: pred.store.continue44: 2214; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = sub <8 x i8> zeroinitializer, [[TMP100]] 2215; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 2216; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP134]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] 2217; DISABLED_MASKED_STRIDED: pred.store.if45: 2218; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i32> [[TMP50]], i32 0 2219; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP135]] 2220; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i8> [[TMP133]], i32 0 2221; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP137]], i8* [[TMP136]], align 1 2222; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] 2223; DISABLED_MASKED_STRIDED: pred.store.continue46: 2224; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 2225; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP138]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] 2226; DISABLED_MASKED_STRIDED: pred.store.if47: 2227; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i32> [[TMP50]], i32 1 2228; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]] 2229; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP133]], i32 1 2230; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], i8* [[TMP140]], align 1 2231; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] 2232; DISABLED_MASKED_STRIDED: pred.store.continue48: 2233; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 2234; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP142]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] 2235; DISABLED_MASKED_STRIDED: pred.store.if49: 2236; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i32> [[TMP50]], i32 2 2237; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]] 2238; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i8> [[TMP133]], i32 2 2239; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP145]], i8* [[TMP144]], align 1 2240; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] 2241; DISABLED_MASKED_STRIDED: pred.store.continue50: 2242; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 2243; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP146]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] 2244; DISABLED_MASKED_STRIDED: pred.store.if51: 2245; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i32> [[TMP50]], i32 3 2246; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]] 2247; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i8> [[TMP133]], i32 3 2248; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP149]], i8* [[TMP148]], align 1 2249; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] 2250; DISABLED_MASKED_STRIDED: pred.store.continue52: 2251; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 2252; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP150]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] 2253; DISABLED_MASKED_STRIDED: pred.store.if53: 2254; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i32> [[TMP50]], i32 4 2255; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]] 2256; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP133]], i32 4 2257; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], i8* [[TMP152]], align 1 2258; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] 2259; DISABLED_MASKED_STRIDED: pred.store.continue54: 2260; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 2261; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP154]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] 2262; DISABLED_MASKED_STRIDED: pred.store.if55: 2263; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i32> [[TMP50]], i32 5 2264; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]] 2265; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i8> [[TMP133]], i32 5 2266; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP157]], i8* [[TMP156]], align 1 2267; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] 2268; DISABLED_MASKED_STRIDED: pred.store.continue56: 2269; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 2270; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP158]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] 2271; DISABLED_MASKED_STRIDED: pred.store.if57: 2272; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i32> [[TMP50]], i32 6 2273; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]] 2274; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i8> [[TMP133]], i32 6 2275; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP161]], i8* [[TMP160]], align 1 2276; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] 2277; DISABLED_MASKED_STRIDED: pred.store.continue58: 2278; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 2279; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP162]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] 2280; DISABLED_MASKED_STRIDED: pred.store.if59: 2281; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i32> [[TMP50]], i32 7 2282; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]] 2283; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = extractelement <8 x i8> [[TMP133]], i32 7 2284; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP165]], i8* [[TMP164]], align 1 2285; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] 2286; DISABLED_MASKED_STRIDED: pred.store.continue60: 2287; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 2288; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 2289; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 2290; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP166]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !9 2291; DISABLED_MASKED_STRIDED: for.end: 2292; DISABLED_MASKED_STRIDED-NEXT: ret void 2293; 2294; ENABLED_MASKED_STRIDED-LABEL: @unconditional_masked_strided2_unknown_tc( 2295; ENABLED_MASKED_STRIDED-NEXT: entry: 2296; ENABLED_MASKED_STRIDED-NEXT: [[CMP20:%.*]] = icmp sgt i32 [[N:%.*]], 0 2297; ENABLED_MASKED_STRIDED-NEXT: br i1 [[CMP20]], label [[VECTOR_PH:%.*]], label [[FOR_END:%.*]] 2298; ENABLED_MASKED_STRIDED: vector.ph: 2299; ENABLED_MASKED_STRIDED-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 7 2300; ENABLED_MASKED_STRIDED-NEXT: [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8 2301; ENABLED_MASKED_STRIDED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1 2302; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0 2303; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer 2304; ENABLED_MASKED_STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] 2305; ENABLED_MASKED_STRIDED: vector.body: 2306; ENABLED_MASKED_STRIDED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2307; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0 2308; ENABLED_MASKED_STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer 2309; ENABLED_MASKED_STRIDED-NEXT: [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2310; ENABLED_MASKED_STRIDED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT]] 2311; ENABLED_MASKED_STRIDED-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1 2312; ENABLED_MASKED_STRIDED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]] 2313; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>* 2314; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 2315; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> undef) 2316; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 2317; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 2318; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], 1 2319; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp slt <8 x i8> [[STRIDED_VEC]], [[STRIDED_VEC3]] 2320; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i8> [[STRIDED_VEC3]], <8 x i8> [[STRIDED_VEC]] 2321; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]] 2322; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 -1 2323; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i32 [[TMP4]] 2324; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* 2325; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 2326; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) 2327; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 2328; ENABLED_MASKED_STRIDED-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 2329; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop !11 2330; ENABLED_MASKED_STRIDED: for.end: 2331; ENABLED_MASKED_STRIDED-NEXT: ret void 2332; 2333entry: 2334 %cmp20 = icmp sgt i32 %n, 0 2335 br i1 %cmp20, label %for.body.preheader, label %for.end 2336 2337for.body.preheader: 2338 br label %for.body 2339 2340for.body: 2341 %ix.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] 2342 %mul = shl nuw nsw i32 %ix.021, 1 2343 %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul 2344 %0 = load i8, i8* %arrayidx, align 1 2345 %add = or i32 %mul, 1 2346 %arrayidx2 = getelementptr inbounds i8, i8* %p, i32 %add 2347 %1 = load i8, i8* %arrayidx2, align 1 2348 %cmp.i = icmp slt i8 %0, %1 2349 %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0 2350 %arrayidx4 = getelementptr inbounds i8, i8* %q, i32 %mul 2351 store i8 %spec.select.i, i8* %arrayidx4, align 1 2352 %sub = sub i8 0, %spec.select.i 2353 %arrayidx8 = getelementptr inbounds i8, i8* %q, i32 %add 2354 store i8 %sub, i8* %arrayidx8, align 1 2355 %inc = add nuw nsw i32 %ix.021, 1 2356 %exitcond = icmp eq i32 %inc, %n 2357 br i1 %exitcond, label %for.end.loopexit, label %for.body 2358 2359for.end.loopexit: 2360 br label %for.end 2361 2362for.end: 2363 ret void 2364} 2365 2366