1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -indvars -loop-unroll -mtriple=thumbv8m.main %s -S -o - | FileCheck %s 3 4define dso_local arm_aapcscc void @test(i32* nocapture %pDest, i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i32 %blkCnt) local_unnamed_addr #0 { 5; CHECK-LABEL: @test( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: [[CMP88:%.*]] = icmp eq i32 [[BLKCNT:%.*]], 0 8; CHECK-NEXT: br i1 [[CMP88]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 9; CHECK: for.body.preheader: 10; CHECK-NEXT: br label [[FOR_BODY:%.*]] 11; CHECK: for.cond.cleanup.loopexit: 12; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 13; CHECK: for.cond.cleanup: 14; CHECK-NEXT: ret void 15; CHECK: for.body: 16; CHECK-NEXT: [[I_092:%.*]] = phi i32 [ [[INC42:%.*]], [[FOR_END40:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 17; CHECK-NEXT: [[PDEST_ADDR_091:%.*]] = phi i32* [ [[PDEST_ADDR_2_LCSSA:%.*]], [[FOR_END40]] ], [ [[PDEST:%.*]], [[FOR_BODY_PREHEADER]] ] 18; CHECK-NEXT: [[PSRCA_ADDR_090:%.*]] = phi i16* [ [[PSRCA_ADDR_2_LCSSA:%.*]], [[FOR_END40]] ], [ [[PSRCA:%.*]], [[FOR_BODY_PREHEADER]] ] 19; CHECK-NEXT: [[PSRCB_ADDR_089:%.*]] = phi i16* [ [[PSRCB_ADDR_2_LCSSA:%.*]], [[FOR_END40]] ], [ [[PSRCB:%.*]], [[FOR_BODY_PREHEADER]] ] 20; CHECK-NEXT: [[TMP0:%.*]] = lshr i32 [[I_092]], 2 21; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[TMP0]], 3 22; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483644 23; CHECK-NEXT: [[CMP272:%.*]] = icmp eq i32 [[TMP0]], 0 24; CHECK-NEXT: br i1 [[CMP272]], label [[FOR_END:%.*]], label [[FOR_BODY3_PREHEADER:%.*]] 25; CHECK: for.body3.preheader: 26; CHECK-NEXT: br label [[FOR_BODY3:%.*]] 27; CHECK: for.body3: 28; CHECK-NEXT: [[J_076:%.*]] = phi i32 [ [[ADD24:%.*]], [[FOR_BODY3]] ], [ 0, [[FOR_BODY3_PREHEADER]] ] 29; CHECK-NEXT: [[PDEST_ADDR_175:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY3]] ], [ [[PDEST_ADDR_091]], [[FOR_BODY3_PREHEADER]] ] 30; CHECK-NEXT: [[PSRCA_ADDR_174:%.*]] = phi i16* [ [[ADD_PTR:%.*]], [[FOR_BODY3]] ], [ [[PSRCA_ADDR_090]], [[FOR_BODY3_PREHEADER]] ] 31; CHECK-NEXT: [[PSRCB_ADDR_173:%.*]] = phi i16* [ [[ADD_PTR23:%.*]], [[FOR_BODY3]] ], [ [[PSRCB_ADDR_089]], [[FOR_BODY3_PREHEADER]] ] 32; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[PSRCA_ADDR_174]], align 2 33; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 34; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[PSRCB_ADDR_173]], align 2 35; CHECK-NEXT: [[CONV5:%.*]] = sext i16 [[TMP4]] to i32 36; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV5]], [[CONV]] 37; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 1 38; CHECK-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2 39; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP5]] to i32 40; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 1 41; CHECK-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX8]], align 2 42; CHECK-NEXT: [[CONV9:%.*]] = sext i16 [[TMP6]] to i32 43; CHECK-NEXT: [[MUL10:%.*]] = mul nsw i32 [[CONV9]], [[CONV7]] 44; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 2 45; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX11]], align 2 46; CHECK-NEXT: [[CONV12:%.*]] = sext i16 [[TMP7]] to i32 47; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 3 48; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX13]], align 2 49; CHECK-NEXT: [[CONV14:%.*]] = sext i16 [[TMP8]] to i32 50; CHECK-NEXT: [[MUL15:%.*]] = mul nsw i32 [[CONV14]], [[CONV12]] 51; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 3 52; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX17]], align 2 53; CHECK-NEXT: [[CONV18:%.*]] = sext i16 [[TMP9]] to i32 54; CHECK-NEXT: [[ADD21:%.*]] = add i32 [[MUL10]], [[MUL]] 55; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ADD21]], [[CONV14]] 56; CHECK-NEXT: [[ADD16:%.*]] = add i32 [[ADD]], [[MUL15]] 57; CHECK-NEXT: [[ADD22:%.*]] = add i32 [[ADD16]], [[CONV18]] 58; CHECK-NEXT: store i32 [[ADD22]], i32* [[PDEST_ADDR_175]], align 4 59; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 4 60; CHECK-NEXT: [[ADD_PTR23]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 4 61; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_175]], i32 1 62; CHECK-NEXT: [[ADD24]] = add nuw nsw i32 [[J_076]], 4 63; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[ADD24]], [[TMP0]] 64; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END_LOOPEXIT:%.*]] 65; CHECK: for.end.loopexit: 66; CHECK-NEXT: [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR]], [[FOR_BODY3]] ] 67; CHECK-NEXT: [[ADD_PTR23_LCSSA:%.*]] = phi i16* [ [[ADD_PTR23]], [[FOR_BODY3]] ] 68; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i32* [ [[INCDEC_PTR]], [[FOR_BODY3]] ] 69; CHECK-NEXT: br label [[FOR_END]] 70; CHECK: for.end: 71; CHECK-NEXT: [[PSRCB_ADDR_1_LCSSA:%.*]] = phi i16* [ [[PSRCB_ADDR_089]], [[FOR_BODY]] ], [ [[ADD_PTR23_LCSSA]], [[FOR_END_LOOPEXIT]] ] 72; CHECK-NEXT: [[PSRCA_ADDR_1_LCSSA:%.*]] = phi i16* [ [[PSRCA_ADDR_090]], [[FOR_BODY]] ], [ [[ADD_PTR_LCSSA]], [[FOR_END_LOOPEXIT]] ] 73; CHECK-NEXT: [[PDEST_ADDR_1_LCSSA:%.*]] = phi i32* [ [[PDEST_ADDR_091]], [[FOR_BODY]] ], [ [[INCDEC_PTR_LCSSA]], [[FOR_END_LOOPEXIT]] ] 74; CHECK-NEXT: [[J_0_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[TMP2]], [[FOR_END_LOOPEXIT]] ] 75; CHECK-NEXT: [[REM:%.*]] = and i32 [[TMP0]], 3 76; CHECK-NEXT: [[ADD25:%.*]] = or i32 [[J_0_LCSSA]], [[REM]] 77; CHECK-NEXT: [[CMP2780:%.*]] = icmp ugt i32 [[ADD25]], [[J_0_LCSSA]] 78; CHECK-NEXT: br i1 [[CMP2780]], label [[FOR_BODY29_PREHEADER:%.*]], label [[FOR_END40]] 79; CHECK: for.body29.preheader: 80; CHECK-NEXT: [[TMP10:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]] 81; CHECK-NEXT: br label [[FOR_BODY29:%.*]] 82; CHECK: for.body29: 83; CHECK-NEXT: [[J_184:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY29]] ], [ [[J_0_LCSSA]], [[FOR_BODY29_PREHEADER]] ] 84; CHECK-NEXT: [[PDEST_ADDR_283:%.*]] = phi i32* [ [[INCDEC_PTR38:%.*]], [[FOR_BODY29]] ], [ [[PDEST_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ] 85; CHECK-NEXT: [[PSRCA_ADDR_282:%.*]] = phi i16* [ [[INCDEC_PTR36:%.*]], [[FOR_BODY29]] ], [ [[PSRCA_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ] 86; CHECK-NEXT: [[PSRCB_ADDR_281:%.*]] = phi i16* [ [[INCDEC_PTR37:%.*]], [[FOR_BODY29]] ], [ [[PSRCB_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ] 87; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_282]], i32 [[J_184]] 88; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX30]], align 2 89; CHECK-NEXT: [[CONV31:%.*]] = sext i16 [[TMP11]] to i32 90; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_281]], i32 [[J_184]] 91; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2 92; CHECK-NEXT: [[CONV33:%.*]] = sext i16 [[TMP12]] to i32 93; CHECK-NEXT: [[MUL34:%.*]] = mul nsw i32 [[CONV33]], [[CONV31]] 94; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[PDEST_ADDR_283]], align 4 95; CHECK-NEXT: [[ADD35:%.*]] = add nsw i32 [[MUL34]], [[TMP13]] 96; CHECK-NEXT: store i32 [[ADD35]], i32* [[PDEST_ADDR_283]], align 4 97; CHECK-NEXT: [[INCDEC_PTR36]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_282]], i32 1 98; CHECK-NEXT: [[INCDEC_PTR37]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_281]], i32 1 99; CHECK-NEXT: [[INCDEC_PTR38]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_283]], i32 1 100; CHECK-NEXT: [[INC]] = add nuw i32 [[J_184]], 1 101; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ADD25]] 102; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END40_LOOPEXIT:%.*]], label [[FOR_BODY29]] 103; CHECK: for.end40.loopexit: 104; CHECK-NEXT: [[SCEVGEP93:%.*]] = getelementptr i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP10]] 105; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 [[TMP10]] 106; CHECK-NEXT: [[SCEVGEP94:%.*]] = getelementptr i32, i32* [[PDEST_ADDR_1_LCSSA]], i32 [[TMP10]] 107; CHECK-NEXT: br label [[FOR_END40]] 108; CHECK: for.end40: 109; CHECK-NEXT: [[PSRCB_ADDR_2_LCSSA]] = phi i16* [ [[PSRCB_ADDR_1_LCSSA]], [[FOR_END]] ], [ [[SCEVGEP93]], [[FOR_END40_LOOPEXIT]] ] 110; CHECK-NEXT: [[PSRCA_ADDR_2_LCSSA]] = phi i16* [ [[PSRCA_ADDR_1_LCSSA]], [[FOR_END]] ], [ [[SCEVGEP]], [[FOR_END40_LOOPEXIT]] ] 111; CHECK-NEXT: [[PDEST_ADDR_2_LCSSA]] = phi i32* [ [[PDEST_ADDR_1_LCSSA]], [[FOR_END]] ], [ [[SCEVGEP94]], [[FOR_END40_LOOPEXIT]] ] 112; CHECK-NEXT: [[INC42]] = add nuw i32 [[I_092]], 1 113; CHECK-NEXT: [[EXITCOND95:%.*]] = icmp eq i32 [[INC42]], [[BLKCNT]] 114; CHECK-NEXT: br i1 [[EXITCOND95]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] 115; 116entry: 117 %cmp88 = icmp eq i32 %blkCnt, 0 118 br i1 %cmp88, label %for.cond.cleanup, label %for.body 119 120for.cond.cleanup: ; preds = %for.end40, %entry 121 ret void 122 123for.body: ; preds = %for.end40, %entry 124 %i.092 = phi i32 [ %inc42, %for.end40 ], [ 0, %entry ] 125 %pDest.addr.091 = phi i32* [ %pDest.addr.2.lcssa, %for.end40 ], [ %pDest, %entry ] 126 %pSrcA.addr.090 = phi i16* [ %pSrcA.addr.2.lcssa, %for.end40 ], [ %pSrcA, %entry ] 127 %pSrcB.addr.089 = phi i16* [ %pSrcB.addr.2.lcssa, %for.end40 ], [ %pSrcB, %entry ] 128 %0 = lshr i32 %i.092, 2 129 %1 = add nuw nsw i32 %0, 3 130 %2 = and i32 %1, 2147483644 131 %cmp272 = icmp eq i32 %0, 0 132 br i1 %cmp272, label %for.end, label %for.body3 133 134for.body3: ; preds = %for.body3, %for.body 135 %j.076 = phi i32 [ %add24, %for.body3 ], [ 0, %for.body ] 136 %pDest.addr.175 = phi i32* [ %incdec.ptr, %for.body3 ], [ %pDest.addr.091, %for.body ] 137 %pSrcA.addr.174 = phi i16* [ %add.ptr, %for.body3 ], [ %pSrcA.addr.090, %for.body ] 138 %pSrcB.addr.173 = phi i16* [ %add.ptr23, %for.body3 ], [ %pSrcB.addr.089, %for.body ] 139 %3 = load i16, i16* %pSrcA.addr.174, align 2 140 %conv = sext i16 %3 to i32 141 %4 = load i16, i16* %pSrcB.addr.173, align 2 142 %conv5 = sext i16 %4 to i32 143 %mul = mul nsw i32 %conv5, %conv 144 %arrayidx6 = getelementptr inbounds i16, i16* %pSrcA.addr.174, i32 1 145 %5 = load i16, i16* %arrayidx6, align 2 146 %conv7 = sext i16 %5 to i32 147 %arrayidx8 = getelementptr inbounds i16, i16* %pSrcB.addr.173, i32 1 148 %6 = load i16, i16* %arrayidx8, align 2 149 %conv9 = sext i16 %6 to i32 150 %mul10 = mul nsw i32 %conv9, %conv7 151 %arrayidx11 = getelementptr inbounds i16, i16* %pSrcA.addr.174, i32 2 152 %7 = load i16, i16* %arrayidx11, align 2 153 %conv12 = sext i16 %7 to i32 154 %arrayidx13 = getelementptr inbounds i16, i16* %pSrcB.addr.173, i32 3 155 %8 = load i16, i16* %arrayidx13, align 2 156 %conv14 = sext i16 %8 to i32 157 %mul15 = mul nsw i32 %conv14, %conv12 158 %arrayidx17 = getelementptr inbounds i16, i16* %pSrcA.addr.174, i32 3 159 %9 = load i16, i16* %arrayidx17, align 2 160 %conv18 = sext i16 %9 to i32 161 %add21 = add i32 %mul10, %mul 162 %add = add i32 %add21, %conv14 163 %add16 = add i32 %add, %mul15 164 %add22 = add i32 %add16, %conv18 165 store i32 %add22, i32* %pDest.addr.175, align 4 166 %add.ptr = getelementptr inbounds i16, i16* %pSrcA.addr.174, i32 4 167 %add.ptr23 = getelementptr inbounds i16, i16* %pSrcB.addr.173, i32 4 168 %incdec.ptr = getelementptr inbounds i32, i32* %pDest.addr.175, i32 1 169 %add24 = add nuw nsw i32 %j.076, 4 170 %cmp2 = icmp ult i32 %add24, %0 171 br i1 %cmp2, label %for.body3, label %for.end 172 173for.end: ; preds = %for.body3, %for.body 174 %pSrcB.addr.1.lcssa = phi i16* [ %pSrcB.addr.089, %for.body ], [ %add.ptr23, %for.body3 ] 175 %pSrcA.addr.1.lcssa = phi i16* [ %pSrcA.addr.090, %for.body ], [ %add.ptr, %for.body3 ] 176 %pDest.addr.1.lcssa = phi i32* [ %pDest.addr.091, %for.body ], [ %incdec.ptr, %for.body3 ] 177 %j.0.lcssa = phi i32 [ 0, %for.body ], [ %2, %for.body3 ] 178 %rem = and i32 %0, 3 179 %add25 = or i32 %j.0.lcssa, %rem 180 %cmp2780 = icmp ugt i32 %add25, %j.0.lcssa 181 br i1 %cmp2780, label %for.body29.preheader, label %for.end40 182 183for.body29.preheader: ; preds = %for.end 184 %10 = sub nsw i32 %add25, %j.0.lcssa 185 %scevgep93 = getelementptr i16, i16* %pSrcB.addr.1.lcssa, i32 %10 186 br label %for.body29 187 188for.body29: ; preds = %for.body29, %for.body29.preheader 189 %j.184 = phi i32 [ %inc, %for.body29 ], [ %j.0.lcssa, %for.body29.preheader ] 190 %pDest.addr.283 = phi i32* [ %incdec.ptr38, %for.body29 ], [ %pDest.addr.1.lcssa, %for.body29.preheader ] 191 %pSrcA.addr.282 = phi i16* [ %incdec.ptr36, %for.body29 ], [ %pSrcA.addr.1.lcssa, %for.body29.preheader ] 192 %pSrcB.addr.281 = phi i16* [ %incdec.ptr37, %for.body29 ], [ %pSrcB.addr.1.lcssa, %for.body29.preheader ] 193 %arrayidx30 = getelementptr inbounds i16, i16* %pSrcA.addr.282, i32 %j.184 194 %11 = load i16, i16* %arrayidx30, align 2 195 %conv31 = sext i16 %11 to i32 196 %arrayidx32 = getelementptr inbounds i16, i16* %pSrcB.addr.281, i32 %j.184 197 %12 = load i16, i16* %arrayidx32, align 2 198 %conv33 = sext i16 %12 to i32 199 %mul34 = mul nsw i32 %conv33, %conv31 200 %13 = load i32, i32* %pDest.addr.283, align 4 201 %add35 = add nsw i32 %mul34, %13 202 store i32 %add35, i32* %pDest.addr.283, align 4 203 %incdec.ptr36 = getelementptr inbounds i16, i16* %pSrcA.addr.282, i32 1 204 %incdec.ptr37 = getelementptr inbounds i16, i16* %pSrcB.addr.281, i32 1 205 %incdec.ptr38 = getelementptr inbounds i32, i32* %pDest.addr.283, i32 1 206 %inc = add nuw i32 %j.184, 1 207 %exitcond = icmp eq i32 %inc, %add25 208 br i1 %exitcond, label %for.end40.loopexit, label %for.body29 209 210for.end40.loopexit: ; preds = %for.body29 211 %scevgep = getelementptr i16, i16* %pSrcA.addr.1.lcssa, i32 %10 212 %scevgep94 = getelementptr i32, i32* %pDest.addr.1.lcssa, i32 %10 213 br label %for.end40 214 215for.end40: ; preds = %for.end40.loopexit, %for.end 216 %pSrcB.addr.2.lcssa = phi i16* [ %pSrcB.addr.1.lcssa, %for.end ], [ %scevgep93, %for.end40.loopexit ] 217 %pSrcA.addr.2.lcssa = phi i16* [ %pSrcA.addr.1.lcssa, %for.end ], [ %scevgep, %for.end40.loopexit ] 218 %pDest.addr.2.lcssa = phi i32* [ %pDest.addr.1.lcssa, %for.end ], [ %scevgep94, %for.end40.loopexit ] 219 %inc42 = add nuw i32 %i.092, 1 220 %exitcond95 = icmp eq i32 %inc42, %blkCnt 221 br i1 %exitcond95, label %for.cond.cleanup, label %for.body 222} 223 224attributes #0 = { minsize optsize } 225