1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -loop-vectorize -force-vector-width=4 -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -tail-predication=force-enabled -S %s -o - | FileCheck %s 3 4define void @test_stride1_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n) { 5; CHECK-LABEL: @test_stride1_4i32( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 8; CHECK: vector.ph: 9; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N:%.*]], 3 10; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 11; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 12; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 13; CHECK: vector.body: 14; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 15; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 16; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 17; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3> 18; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 19; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) 20; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[TMP0]], 1 21; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 2 22; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP2]] 23; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0 24; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* 25; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP5]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) 26; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]] 27; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP0]] 28; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0 29; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 30; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[TMP6]], <4 x i32>* [[TMP9]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) 31; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 32; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 33; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 34; CHECK: middle.block: 35; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]] 36; CHECK: scalar.ph: 37; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 38; CHECK-NEXT: br label [[FOR_BODY:%.*]] 39; CHECK: for.body: 40; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 41; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 1 42; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 43; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]] 44; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 45; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP11]] 46; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]] 47; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 48; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 49; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] 50; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] 51; CHECK: end: 52; CHECK-NEXT: ret void 53; 54entry: 55 br label %for.body 56for.body: ; preds = %for.body.preheader, %for.body 57 %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 58 %mul = mul nuw nsw i32 %i.023, 1 59 %add5 = add nuw nsw i32 %mul, 2 60 %arrayidx6 = getelementptr inbounds i32, i32* %data, i32 %add5 61 %0 = load i32, i32* %arrayidx6, align 4 62 %add7 = add nsw i32 5, %0 63 %arrayidx9 = getelementptr inbounds i32, i32* %dst, i32 %i.023 64 store i32 %add7, i32* %arrayidx9, align 4 65 %inc = add nuw nsw i32 %i.023, 1 66 %exitcond.not = icmp eq i32 %inc, %n 67 br i1 %exitcond.not, label %end, label %for.body 68end: ; preds = %end, %entry 69 ret void 70} 71 72define void @test_stride-1_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n) { 73; CHECK-LABEL: @test_stride-1_4i32( 74; CHECK-NEXT: entry: 75; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4 76; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 77; CHECK: vector.scevcheck: 78; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 79; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP0]]) 80; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 81; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 82; CHECK-NEXT: [[TMP1:%.*]] = add i32 2, [[MUL_RESULT]] 83; CHECK-NEXT: [[TMP2:%.*]] = sub i32 2, [[MUL_RESULT]] 84; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 2 85; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP1]], 2 86; CHECK-NEXT: [[TMP5:%.*]] = select i1 true, i1 [[TMP3]], i1 [[TMP4]] 87; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] 88; CHECK-NEXT: [[TMP7:%.*]] = or i1 false, [[TMP6]] 89; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 90; CHECK: vector.ph: 91; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 92; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] 93; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 94; CHECK: vector.body: 95; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 96; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 0 97; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP8]], -1 98; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i32 [[TMP9]], 2 99; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP10]] 100; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 101; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 -3 102; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 103; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 104; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 105; CHECK-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[REVERSE]] 106; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP8]] 107; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i32 0 108; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* 109; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP18]], align 4 110; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 111; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 112; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] 113; CHECK: middle.block: 114; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] 115; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] 116; CHECK: scalar.ph: 117; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 118; CHECK-NEXT: br label [[FOR_BODY:%.*]] 119; CHECK: for.body: 120; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 121; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], -1 122; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 123; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]] 124; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 125; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP20]] 126; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]] 127; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 128; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 129; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] 130; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] 131; CHECK: end: 132; CHECK-NEXT: ret void 133; 134entry: 135 br label %for.body 136for.body: ; preds = %for.body.preheader, %for.body 137 %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 138 %mul = mul nuw nsw i32 %i.023, -1 139 %add5 = add nuw nsw i32 %mul, 2 140 %arrayidx6 = getelementptr inbounds i32, i32* %data, i32 %add5 141 %0 = load i32, i32* %arrayidx6, align 4 142 %add7 = add nsw i32 5, %0 143 %arrayidx9 = getelementptr inbounds i32, i32* %dst, i32 %i.023 144 store i32 %add7, i32* %arrayidx9, align 4 145 %inc = add nuw nsw i32 %i.023, 1 146 %exitcond.not = icmp eq i32 %inc, %n 147 br i1 %exitcond.not, label %end, label %for.body 148end: ; preds = %end, %entry 149 ret void 150} 151 152define void @test_stride2_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n) { 153; 154; CHECK-LABEL: @test_stride2_4i32( 155; CHECK-NEXT: entry: 156; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[N:%.*]], 4 157; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 158; CHECK: vector.ph: 159; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 160; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 161; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i32 4, i32 [[N_MOD_VF]] 162; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[TMP1]] 163; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 164; CHECK: vector.body: 165; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 166; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 167; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 2 168; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 2 169; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP4]] 170; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 171; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>* 172; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP7]], align 4 173; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 174; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[STRIDED_VEC]] 175; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP2]] 176; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 177; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* 178; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4 179; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 180; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 181; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] 182; CHECK: middle.block: 183; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] 184; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] 185; CHECK: scalar.ph: 186; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 187; CHECK-NEXT: br label [[FOR_BODY:%.*]] 188; CHECK: for.body: 189; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 190; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 2 191; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 192; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]] 193; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 194; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP13]] 195; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]] 196; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 197; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 198; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] 199; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]] 200; CHECK: end: 201; CHECK-NEXT: ret void 202; 203entry: 204 br label %for.body 205for.body: ; preds = %for.body.preheader, %for.body 206 %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 207 %mul = mul nuw nsw i32 %i.023, 2 208 %add5 = add nuw nsw i32 %mul, 2 209 %arrayidx6 = getelementptr inbounds i32, i32* %data, i32 %add5 210 %0 = load i32, i32* %arrayidx6, align 4 211 %add7 = add nsw i32 5, %0 212 %arrayidx9 = getelementptr inbounds i32, i32* %dst, i32 %i.023 213 store i32 %add7, i32* %arrayidx9, align 4 214 %inc = add nuw nsw i32 %i.023, 1 215 %exitcond.not = icmp eq i32 %inc, %n 216 br i1 %exitcond.not, label %end, label %for.body 217end: ; preds = %end, %entry 218 ret void 219} 220 221define void @test_stride3_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n) { 222; CHECK-LABEL: @test_stride3_4i32( 223; CHECK-NEXT: entry: 224; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 225; CHECK: vector.ph: 226; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N:%.*]], 3 227; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 228; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 229; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 230; CHECK: vector.body: 231; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 232; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 233; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 234; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 235; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 236; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 237; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) 238; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3> 239; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2> 240; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], <4 x i32> [[TMP5]] 241; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) 242; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_GATHER]] 243; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP0]] 244; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 245; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* 246; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) 247; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 248; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 249; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 250; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] 251; CHECK: middle.block: 252; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]] 253; CHECK: scalar.ph: 254; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 255; CHECK-NEXT: br label [[FOR_BODY:%.*]] 256; CHECK: for.body: 257; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 258; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 3 259; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 260; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]] 261; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 262; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP12]] 263; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]] 264; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 265; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 266; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] 267; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]], [[LOOP9:!llvm.loop !.*]] 268; CHECK: end: 269; CHECK-NEXT: ret void 270; 271entry: 272 br label %for.body 273for.body: ; preds = %for.body.preheader, %for.body 274 %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 275 %mul = mul nuw nsw i32 %i.023, 3 276 %add5 = add nuw nsw i32 %mul, 2 277 %arrayidx6 = getelementptr inbounds i32, i32* %data, i32 %add5 278 %0 = load i32, i32* %arrayidx6, align 4 279 %add7 = add nsw i32 5, %0 280 %arrayidx9 = getelementptr inbounds i32, i32* %dst, i32 %i.023 281 store i32 %add7, i32* %arrayidx9, align 4 282 %inc = add nuw nsw i32 %i.023, 1 283 %exitcond.not = icmp eq i32 %inc, %n 284 br i1 %exitcond.not, label %end, label %for.body 285end: ; preds = %end, %entry 286 ret void 287} 288 289define void @test_stride4_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n) { 290; CHECK-LABEL: @test_stride4_4i32( 291; CHECK-NEXT: entry: 292; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 293; CHECK: vector.ph: 294; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N:%.*]], 3 295; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 296; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 297; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 298; CHECK: vector.body: 299; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 300; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 301; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 302; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 303; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 304; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 305; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]]) 306; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 307; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2> 308; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], <4 x i32> [[TMP5]] 309; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) 310; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_GATHER]] 311; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP0]] 312; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 313; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* 314; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) 315; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 316; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 317; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 318; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] 319; CHECK: middle.block: 320; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]] 321; CHECK: scalar.ph: 322; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 323; CHECK-NEXT: br label [[FOR_BODY:%.*]] 324; CHECK: for.body: 325; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 326; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 4 327; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 328; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]] 329; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 330; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP12]] 331; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]] 332; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 333; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 334; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] 335; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]], [[LOOP11:!llvm.loop !.*]] 336; CHECK: end: 337; CHECK-NEXT: ret void 338; 339entry: 340 br label %for.body 341for.body: ; preds = %for.body.preheader, %for.body 342 %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 343 %mul = mul nuw nsw i32 %i.023, 4 344 %add5 = add nuw nsw i32 %mul, 2 345 %arrayidx6 = getelementptr inbounds i32, i32* %data, i32 %add5 346 %0 = load i32, i32* %arrayidx6, align 4 347 %add7 = add nsw i32 5, %0 348 %arrayidx9 = getelementptr inbounds i32, i32* %dst, i32 %i.023 349 store i32 %add7, i32* %arrayidx9, align 4 350 %inc = add nuw nsw i32 %i.023, 1 351 %exitcond.not = icmp eq i32 %inc, %n 352 br i1 %exitcond.not, label %end, label %for.body 353end: ; preds = %end, %entry 354 ret void 355} 356 357define void @test_stride_loopinvar_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n, i32 %stride) { 358; CHECK-LABEL: @test_stride_loopinvar_4i32( 359; CHECK-NEXT: entry: 360; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 361; CHECK: vector.scevcheck: 362; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STRIDE:%.*]], 1 363; CHECK-NEXT: [[TMP0:%.*]] = or i1 false, [[IDENT_CHECK]] 364; CHECK-NEXT: br i1 [[TMP0]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 365; CHECK: vector.ph: 366; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N:%.*]], 3 367; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 368; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 369; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 370; CHECK: vector.body: 371; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 372; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 373; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 374; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3> 375; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 376; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP1]], i32 [[N]]) 377; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i32 [[TMP1]], [[STRIDE]] 378; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 2 379; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[TMP3]] 380; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 381; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 382; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) 383; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]] 384; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP1]] 385; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 386; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* 387; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]]) 388; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 389; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 390; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] 391; CHECK: middle.block: 392; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]] 393; CHECK: scalar.ph: 394; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 395; CHECK-NEXT: br label [[FOR_BODY:%.*]] 396; CHECK: for.body: 397; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 398; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], [[STRIDE]] 399; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 400; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]] 401; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 402; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP12]] 403; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]] 404; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 405; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 406; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] 407; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]], [[LOOP13:!llvm.loop !.*]] 408; CHECK: end: 409; CHECK-NEXT: ret void 410; 411entry: 412 br label %for.body 413for.body: ; preds = %for.body.preheader, %for.body 414 %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 415 %mul = mul nuw nsw i32 %i.023, %stride 416 %add5 = add nuw nsw i32 %mul, 2 417 %arrayidx6 = getelementptr inbounds i32, i32* %data, i32 %add5 418 %0 = load i32, i32* %arrayidx6, align 4 419 %add7 = add nsw i32 5, %0 420 %arrayidx9 = getelementptr inbounds i32, i32* %dst, i32 %i.023 421 store i32 %add7, i32* %arrayidx9, align 4 422 %inc = add nuw nsw i32 %i.023, 1 423 %exitcond.not = icmp eq i32 %inc, %n 424 br i1 %exitcond.not, label %end, label %for.body 425end: ; preds = %end, %entry 426 ret void 427} 428 429define void @test_stride_noninvar_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n) { 430; CHECK-LABEL: @test_stride_noninvar_4i32( 431; CHECK-NEXT: entry: 432; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4 433; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 434; CHECK: vector.ph: 435; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 436; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] 437; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[N_VEC]], 8 438; CHECK-NEXT: [[IND_END:%.*]] = add i32 3, [[TMP0]] 439; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 440; CHECK: vector.body: 441; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 442; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 443; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 3, i32 11, i32 19, i32 27>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 444; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 445; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 446; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 2 447; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 3 448; CHECK-NEXT: [[TMP5:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], [[VEC_IND2]] 449; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <4 x i32> [[TMP5]], <i32 2, i32 2, i32 2, i32 2> 450; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], <4 x i32> [[TMP6]] 451; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP7]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 452; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_GATHER]] 453; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP1]] 454; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 455; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* 456; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4 457; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 458; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 459; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 32, i32 32, i32 32, i32 32> 460; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 461; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] 462; CHECK: middle.block: 463; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] 464; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] 465; CHECK: scalar.ph: 466; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 467; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY]] ] 468; CHECK-NEXT: br label [[FOR_BODY:%.*]] 469; CHECK: for.body: 470; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 471; CHECK-NEXT: [[STRIDE:%.*]] = phi i32 [ [[NEXT_STRIDE:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 472; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], [[STRIDE]] 473; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 474; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]] 475; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 476; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP13]] 477; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]] 478; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 479; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 480; CHECK-NEXT: [[NEXT_STRIDE]] = add nuw nsw i32 [[STRIDE]], 8 481; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] 482; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]], [[LOOP15:!llvm.loop !.*]] 483; CHECK: end: 484; CHECK-NEXT: ret void 485; 486entry: 487 br label %for.body 488for.body: ; preds = %for.body.preheader, %for.body 489 %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 490 %stride = phi i32 [ %next.stride, %for.body ], [ 3, %entry ] 491 %mul = mul nuw nsw i32 %i.023, %stride 492 %add5 = add nuw nsw i32 %mul, 2 493 %arrayidx6 = getelementptr inbounds i32, i32* %data, i32 %add5 494 %0 = load i32, i32* %arrayidx6, align 4 495 %add7 = add nsw i32 5, %0 496 %arrayidx9 = getelementptr inbounds i32, i32* %dst, i32 %i.023 497 store i32 %add7, i32* %arrayidx9, align 4 498 %inc = add nuw nsw i32 %i.023, 1 499 %next.stride = add nuw nsw i32 %stride, 8 500 %exitcond.not = icmp eq i32 %inc, %n 501 br i1 %exitcond.not, label %end, label %for.body 502end: ; preds = %end, %entry 503 ret void 504} 505 506define void @test_stride_noninvar2_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n) { 507; CHECK-LABEL: @test_stride_noninvar2_4i32( 508; CHECK-NEXT: entry: 509; CHECK-NEXT: br label [[FOR_BODY:%.*]] 510; CHECK: for.body: 511; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] 512; CHECK-NEXT: [[STRIDE:%.*]] = phi i32 [ [[NEXT_STRIDE:%.*]], [[FOR_BODY]] ], [ 3, [[ENTRY]] ] 513; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], [[STRIDE]] 514; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 515; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i32 [[ADD5]] 516; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 517; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP0]] 518; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[I_023]] 519; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 520; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 521; CHECK-NEXT: [[NEXT_STRIDE]] = mul nuw nsw i32 [[STRIDE]], 8 522; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N:%.*]] 523; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END:%.*]], label [[FOR_BODY]] 524; CHECK: end: 525; CHECK-NEXT: ret void 526; 527entry: 528 br label %for.body 529for.body: ; preds = %for.body.preheader, %for.body 530 %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 531 %stride = phi i32 [ %next.stride, %for.body ], [ 3, %entry ] 532 %mul = mul nuw nsw i32 %i.023, %stride 533 %add5 = add nuw nsw i32 %mul, 2 534 %arrayidx6 = getelementptr inbounds i32, i32* %data, i32 %add5 535 %0 = load i32, i32* %arrayidx6, align 4 536 %add7 = add nsw i32 5, %0 537 %arrayidx9 = getelementptr inbounds i32, i32* %dst, i32 %i.023 538 store i32 %add7, i32* %arrayidx9, align 4 539 %inc = add nuw nsw i32 %i.023, 1 540 %next.stride = mul nuw nsw i32 %stride, 8 541 %exitcond.not = icmp eq i32 %inc, %n 542 br i1 %exitcond.not, label %end, label %for.body 543end: ; preds = %end, %entry 544 ret void 545} 546 547define void @test_stride_noninvar3_4i32(i32* readonly %data, i32* noalias nocapture %dst, i32 %n, i32 %x) { 548; CHECK-LABEL: @test_stride_noninvar3_4i32( 549; CHECK-NEXT: entry: 550; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4 551; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 552; CHECK: vector.ph: 553; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 554; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] 555; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[N_VEC]], [[X:%.*]] 556; CHECK-NEXT: [[IND_END:%.*]] = add i32 3, [[TMP0]] 557; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[X]], i32 0 558; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer 559; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]] 560; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> <i32 3, i32 3, i32 3, i32 3>, [[TMP1]] 561; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[X]], 4 562; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0 563; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> undef, <4 x i32> zeroinitializer 564; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 565; CHECK: vector.body: 566; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 567; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 568; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] 569; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 570; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 571; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2 572; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3 573; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], [[VEC_IND4]] 574; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <4 x i32> [[TMP7]], <i32 2, i32 2, i32 2, i32 2> 575; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], <4 x i32> [[TMP8]] 576; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP9]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 577; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_GATHER]] 578; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i32 [[TMP3]] 579; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 580; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* 581; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP13]], align 4 582; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 583; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 584; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[VEC_IND4]], [[DOTSPLAT3]] 585; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 586; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP16:!llvm.loop !.*]] 587; CHECK: middle.block: 588; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] 589; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] 590; CHECK: scalar.ph: 591; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 592; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY]] ] 593; CHECK-NEXT: br label [[FOR_BODY:%.*]] 594; CHECK: for.body: 595; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 596; CHECK-NEXT: [[STRIDE:%.*]] = phi i32 [ [[NEXT_STRIDE:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 597; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], [[STRIDE]] 598; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 599; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i32 [[ADD5]] 600; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 601; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP15]] 602; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i32 [[I_023]] 603; CHECK-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX9]], align 4 604; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 605; CHECK-NEXT: [[NEXT_STRIDE]] = add nuw nsw i32 [[STRIDE]], [[X]] 606; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] 607; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]], [[LOOP17:!llvm.loop !.*]] 608; CHECK: end: 609; CHECK-NEXT: ret void 610; 611entry: 612 br label %for.body 613for.body: ; preds = %for.body.preheader, %for.body 614 %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 615 %stride = phi i32 [ %next.stride, %for.body ], [ 3, %entry ] 616 %mul = mul nuw nsw i32 %i.023, %stride 617 %add5 = add nuw nsw i32 %mul, 2 618 %arrayidx6 = getelementptr inbounds i32, i32* %data, i32 %add5 619 %0 = load i32, i32* %arrayidx6, align 4 620 %add7 = add nsw i32 5, %0 621 %arrayidx9 = getelementptr inbounds i32, i32* %dst, i32 %i.023 622 store i32 %add7, i32* %arrayidx9, align 4 623 %inc = add nuw nsw i32 %i.023, 1 624 %next.stride = add nuw nsw i32 %stride, %x 625 %exitcond.not = icmp eq i32 %inc, %n 626 br i1 %exitcond.not, label %end, label %for.body 627end: ; preds = %end, %entry 628 ret void 629} 630 631declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 632declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) 633declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) 634declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) 635declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) 636