1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -indvars -S -indvars-predicate-loops=0 | FileCheck %s 3 4; Make sure that indvars can perform LFTR without a canonical IV. 5 6target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 7 8; Perform LFTR using the original pointer-type IV. 9 10declare void @use(double %x) 11 12; for(char* p = base; p < base + n; ++p) { 13; *p = p-base; 14; } 15define void @ptriv(i8* %base, i32 %n) nounwind { 16; CHECK-LABEL: @ptriv( 17; CHECK-NEXT: entry: 18; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[N:%.*]] to i64 19; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 [[IDX_EXT]] 20; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8* [[BASE]], [[ADD_PTR]] 21; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 22; CHECK: for.body.preheader: 23; CHECK-NEXT: br label [[FOR_BODY:%.*]] 24; CHECK: for.body: 25; CHECK-NEXT: [[P_02:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BASE]], [[FOR_BODY_PREHEADER]] ] 26; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint i8* [[P_02]] to i64 27; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i8* [[BASE]] to i64 28; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] 29; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i8 30; CHECK-NEXT: store i8 [[CONV]], i8* [[P_02]] 31; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[P_02]], i32 1 32; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[ADD_PTR]] 33; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] 34; CHECK: for.end.loopexit: 35; CHECK-NEXT: br label [[FOR_END]] 36; CHECK: for.end: 37; CHECK-NEXT: ret void 38; 39entry: 40 %idx.ext = sext i32 %n to i64 41 %add.ptr = getelementptr inbounds i8, i8* %base, i64 %idx.ext 42 %cmp1 = icmp ult i8* %base, %add.ptr 43 br i1 %cmp1, label %for.body, label %for.end 44 45for.body: 46 %p.02 = phi i8* [ %base, %entry ], [ %incdec.ptr, %for.body ] 47 ; cruft to make the IV useful 48 %sub.ptr.lhs.cast = ptrtoint i8* %p.02 to i64 49 %sub.ptr.rhs.cast = ptrtoint i8* %base to i64 50 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast 51 %conv = trunc i64 %sub.ptr.sub to i8 52 store i8 %conv, i8* %p.02 53 %incdec.ptr = getelementptr inbounds i8, i8* %p.02, i32 1 54 %cmp = icmp ult i8* %incdec.ptr, %add.ptr 55 br i1 %cmp, label %for.body, label %for.end 56 57for.end: 58 ret void 59} 60 61; This test checks that SCEVExpander can handle an outer loop that has been 62; simplified, and as a result the inner loop's exit test will be rewritten. 63define void @expandOuterRecurrence(i32 %arg) nounwind { 64; CHECK-LABEL: @expandOuterRecurrence( 65; CHECK-NEXT: entry: 66; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 67; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] 68; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] 69; CHECK: outer.preheader: 70; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 71; CHECK-NEXT: br label [[OUTER:%.*]] 72; CHECK: outer: 73; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] 74; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] 75; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] 76; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 77; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] 78; CHECK-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] 79; CHECK: inner.ph: 80; CHECK-NEXT: br label [[INNER:%.*]] 81; CHECK: inner: 82; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] 83; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 84; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] 85; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] 86; CHECK: outer.inc.loopexit: 87; CHECK-NEXT: br label [[OUTER_INC]] 88; CHECK: outer.inc: 89; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 90; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 91; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] 92; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] 93; CHECK: exit.loopexit: 94; CHECK-NEXT: br label [[EXIT]] 95; CHECK: exit: 96; CHECK-NEXT: ret void 97; 98entry: 99 %sub1 = sub nsw i32 %arg, 1 100 %cmp1 = icmp slt i32 0, %sub1 101 br i1 %cmp1, label %outer, label %exit 102 103outer: 104 %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] 105 %sub2 = sub nsw i32 %arg, %i 106 %sub3 = sub nsw i32 %sub2, 1 107 %cmp2 = icmp slt i32 0, %sub3 108 br i1 %cmp2, label %inner.ph, label %outer.inc 109 110inner.ph: 111 br label %inner 112 113inner: 114 %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] 115 %j.inc = add nsw i32 %j, 1 116 %cmp3 = icmp slt i32 %j.inc, %sub3 117 br i1 %cmp3, label %inner, label %outer.inc 118 119outer.inc: 120 %i.inc = add nsw i32 %i, 1 121 %cmp4 = icmp slt i32 %i.inc, %sub1 122 br i1 %cmp4, label %outer, label %exit 123 124exit: 125 ret void 126} 127 128; Force SCEVExpander to look for an existing well-formed phi. 129; Perform LFTR without generating extra preheader code. 130define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector, 131; 132; CHECK-LABEL: @guardedloop( 133; CHECK-NEXT: entry: 134; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[IROW:%.*]] 135; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_PREHEADER:%.*]], label [[RETURN:%.*]] 136; CHECK: loop.preheader: 137; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[ILEAD:%.*]] to i64 138; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[IROW]] to i64 139; CHECK-NEXT: br label [[LOOP:%.*]] 140; CHECK: loop: 141; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[LOOP]] ] 142; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] 143; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], [[INDVARS_IV2]] 144; CHECK-NEXT: [[MATRIXP:%.*]] = getelementptr inbounds [0 x double], [0 x double]* [[MATRIX:%.*]], i32 0, i64 [[TMP1]] 145; CHECK-NEXT: [[V1:%.*]] = load double, double* [[MATRIXP]] 146; CHECK-NEXT: call void @use(double [[V1]]) 147; CHECK-NEXT: [[VECTORP:%.*]] = getelementptr inbounds [0 x double], [0 x double]* [[VECTOR:%.*]], i32 0, i64 [[INDVARS_IV2]] 148; CHECK-NEXT: [[V2:%.*]] = load double, double* [[VECTORP]] 149; CHECK-NEXT: call void @use(double [[V2]]) 150; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] 151; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 152; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT]] 153; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[RETURN_LOOPEXIT:%.*]] 154; CHECK: return.loopexit: 155; CHECK-NEXT: br label [[RETURN]] 156; CHECK: return: 157; CHECK-NEXT: ret void 158; 159 i32 %irow, i32 %ilead) nounwind { 160entry: 161 %cmp = icmp slt i32 1, %irow 162 br i1 %cmp, label %loop, label %return 163 164loop: 165 %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ] 166 %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ] 167 %diagidx = add nsw i32 %rowidx, %i 168 %diagidxw = sext i32 %diagidx to i64 169 %matrixp = getelementptr inbounds [0 x double], [0 x double]* %matrix, i32 0, i64 %diagidxw 170 %v1 = load double, double* %matrixp 171 call void @use(double %v1) 172 %iw = sext i32 %i to i64 173 %vectorp = getelementptr inbounds [0 x double], [0 x double]* %vector, i32 0, i64 %iw 174 %v2 = load double, double* %vectorp 175 call void @use(double %v2) 176 %row.inc = add nsw i32 %rowidx, %ilead 177 %i.inc = add nsw i32 %i, 1 178 %cmp196 = icmp slt i32 %i.inc, %irow 179 br i1 %cmp196, label %loop, label %return 180 181return: 182 ret void 183} 184 185; Avoid generating extra code to materialize a trip count. Skip LFTR. 186define void @unguardedloop([0 x double]* %matrix, [0 x double]* %vector, 187; 188; CHECK-LABEL: @unguardedloop( 189; CHECK-NEXT: entry: 190; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[IROW:%.*]], 1 191; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[IROW]], i32 1 192; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 193; CHECK-NEXT: br label [[LOOP:%.*]] 194; CHECK: loop: 195; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] 196; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 197; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT]] 198; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[RETURN:%.*]] 199; CHECK: return: 200; CHECK-NEXT: ret void 201; 202 i32 %irow, i32 %ilead) nounwind { 203entry: 204 br label %loop 205 206loop: 207 %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ] 208 %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ] 209 %diagidx = add nsw i32 %rowidx, %i 210 %diagidxw = sext i32 %diagidx to i64 211 %matrixp = getelementptr inbounds [0 x double], [0 x double]* %matrix, i32 0, i64 %diagidxw 212 %v1 = load double, double* %matrixp 213 %iw = sext i32 %i to i64 214 %vectorp = getelementptr inbounds [0 x double], [0 x double]* %vector, i32 0, i64 %iw 215 %v2 = load double, double* %vectorp 216 %row.inc = add nsw i32 %rowidx, %ilead 217 %i.inc = add nsw i32 %i, 1 218 %cmp196 = icmp slt i32 %i.inc, %irow 219 br i1 %cmp196, label %loop, label %return 220 221return: 222 ret void 223} 224 225; Remove %i which is only used by the exit test. 226; Verify that SCEV can still compute a backedge count from the sign 227; extended %n, used for pointer comparison by LFTR. 228; 229; TODO: Fix for PR13371 currently makes this impossible. See 230; IndVarSimplify.cpp hasConcreteDef(). We may want to change to undef rules. 231define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind { 232; CHECK-LABEL: @geplftr( 233; CHECK-NEXT: entry: 234; CHECK-NEXT: [[X_EXT:%.*]] = sext i32 [[X:%.*]] to i64 235; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 [[X_EXT]] 236; CHECK-NEXT: [[Y_EXT:%.*]] = sext i32 [[Y:%.*]] to i64 237; CHECK-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 [[Y_EXT]] 238; CHECK-NEXT: [[LIM:%.*]] = add i32 [[X]], [[N:%.*]] 239; CHECK-NEXT: [[CMP_PH:%.*]] = icmp ult i32 [[X]], [[LIM]] 240; CHECK-NEXT: br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 241; CHECK: loop.preheader: 242; CHECK-NEXT: br label [[LOOP:%.*]] 243; CHECK: loop: 244; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ] 245; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[ADD_PTR10]], [[LOOP_PREHEADER]] ] 246; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 247; CHECK-NEXT: store i8 3, i8* [[APTR]] 248; CHECK-NEXT: [[INC]] = add nuw i32 [[I]], 1 249; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], [[LIM]] 250; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] 251; CHECK: exit.loopexit: 252; CHECK-NEXT: br label [[EXIT]] 253; CHECK: exit: 254; CHECK-NEXT: ret void 255; 256entry: 257 %x.ext = sext i32 %x to i64 258 %add.ptr = getelementptr inbounds i8, i8* %base, i64 %x.ext 259 %y.ext = sext i32 %y to i64 260 %add.ptr10 = getelementptr inbounds i8, i8* %add.ptr, i64 %y.ext 261 %lim = add i32 %x, %n 262 %cmp.ph = icmp ult i32 %x, %lim 263 br i1 %cmp.ph, label %loop, label %exit 264loop: 265 %i = phi i32 [ %x, %entry ], [ %inc, %loop ] 266 %aptr = phi i8* [ %add.ptr10, %entry ], [ %incdec.ptr, %loop ] 267 %incdec.ptr = getelementptr inbounds i8, i8* %aptr, i32 1 268 store i8 3, i8* %aptr 269 %inc = add i32 %i, 1 270 %cmp = icmp ult i32 %inc, %lim 271 br i1 %cmp, label %loop, label %exit 272 273exit: 274 ret void 275} 276 277; Exercise backedge taken count verification with a never-taken loop. 278define void @nevertaken() nounwind uwtable ssp { 279; CHECK-LABEL: @nevertaken( 280; CHECK-NEXT: entry: 281; CHECK-NEXT: br label [[LOOP:%.*]] 282; CHECK: loop: 283; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] 284; CHECK: exit: 285; CHECK-NEXT: ret void 286; 287entry: 288 br label %loop 289loop: 290 %i = phi i32 [ 0, %entry ], [ %inc, %loop ] 291 %inc = add nsw i32 %i, 1 292 %cmp = icmp sle i32 %inc, 0 293 br i1 %cmp, label %loop, label %exit 294 295exit: 296 ret void 297} 298 299; Test LFTR on an IV whose recurrence start is a non-unit pointer type. 300define void @aryptriv([256 x i8]* %base, i32 %n) nounwind { 301; CHECK-LABEL: @aryptriv( 302; CHECK-NEXT: entry: 303; CHECK-NEXT: [[IVSTART:%.*]] = getelementptr inbounds [256 x i8], [256 x i8]* [[BASE:%.*]], i32 0, i32 0 304; CHECK-NEXT: [[IVEND:%.*]] = getelementptr inbounds [256 x i8], [256 x i8]* [[BASE]], i32 0, i32 [[N:%.*]] 305; CHECK-NEXT: [[CMP_PH:%.*]] = icmp ult i8* [[IVSTART]], [[IVEND]] 306; CHECK-NEXT: br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 307; CHECK: loop.preheader: 308; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 309; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [256 x i8], [256 x i8]* [[BASE]], i64 0, i64 [[TMP0]] 310; CHECK-NEXT: br label [[LOOP:%.*]] 311; CHECK: loop: 312; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[IVSTART]], [[LOOP_PREHEADER]] ] 313; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 314; CHECK-NEXT: store i8 3, i8* [[APTR]] 315; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[SCEVGEP]] 316; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] 317; CHECK: exit.loopexit: 318; CHECK-NEXT: br label [[EXIT]] 319; CHECK: exit: 320; CHECK-NEXT: ret void 321; 322entry: 323 %ivstart = getelementptr inbounds [256 x i8], [256 x i8]* %base, i32 0, i32 0 324 %ivend = getelementptr inbounds [256 x i8], [256 x i8]* %base, i32 0, i32 %n 325 %cmp.ph = icmp ult i8* %ivstart, %ivend 326 br i1 %cmp.ph, label %loop, label %exit 327 328loop: 329 %aptr = phi i8* [ %ivstart, %entry ], [ %incdec.ptr, %loop ] 330 %incdec.ptr = getelementptr inbounds i8, i8* %aptr, i32 1 331 store i8 3, i8* %aptr 332 %cmp = icmp ult i8* %incdec.ptr, %ivend 333 br i1 %cmp, label %loop, label %exit 334 335exit: 336 ret void 337} 338