1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -loop-unroll -mtriple=arm64-apple-iphoneos -S %s | FileCheck %s 3 4; Check we unroll even with optsize, if the result is smaller, either because 5; we have single iteration loops or bodies with constant folding opportunities 6; after fully unrolling. 7 8declare i32 @get() 9 10define void @fully_unrolled_single_iteration(i32* %src) #0 { 11; CHECK-LABEL: @fully_unrolled_single_iteration( 12; CHECK-NEXT: entry: 13; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 14; CHECK-NEXT: br label [[FOR_BODY:%.*]] 15; CHECK: for.body: 16; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[SRC:%.*]] 17; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR]], i64 0, i64 0 18; CHECK-NEXT: store i32 [[V]], i32* [[ARRAYIDX]], align 4 19; CHECK-NEXT: [[PTR:%.*]] = bitcast [4 x i32]* [[ARR]] to i32* 20; CHECK-NEXT: call void @use(i32* nonnull [[PTR]]) 21; CHECK-NEXT: ret void 22; 23entry: 24 %arr = alloca [4 x i32], align 4 25 br label %for.body 26 27for.body: ; preds = %for.body, %entry 28 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 29 %src.idx = getelementptr inbounds i32, i32* %src, i64 %indvars.iv 30 %v = load i32, i32* %src.idx 31 %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %arr, i64 0, i64 %indvars.iv 32 store i32 %v, i32* %arrayidx, align 4 33 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 34 %exitcond = icmp eq i64 %indvars.iv.next, 1 35 br i1 %exitcond, label %for.cond.cleanup, label %for.body 36 37for.cond.cleanup: ; preds = %for.cond 38 %ptr = bitcast [4 x i32]* %arr to i32* 39 call void @use(i32* nonnull %ptr) #4 40 ret void 41} 42 43 44define void @fully_unrolled_smaller() #0 { 45; CHECK-LABEL: @fully_unrolled_smaller( 46; CHECK-NEXT: entry: 47; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 48; CHECK-NEXT: br label [[FOR_BODY:%.*]] 49; CHECK: for.body: 50; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR]], i64 0, i64 0 51; CHECK-NEXT: store i32 16, i32* [[ARRAYIDX]], align 4 52; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR]], i64 0, i64 1 53; CHECK-NEXT: store i32 4104, i32* [[ARRAYIDX_1]], align 4 54; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR]], i64 0, i64 2 55; CHECK-NEXT: store i32 1048592, i32* [[ARRAYIDX_2]], align 4 56; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR]], i64 0, i64 3 57; CHECK-NEXT: store i32 268435480, i32* [[ARRAYIDX_3]], align 4 58; CHECK-NEXT: [[PTR:%.*]] = bitcast [4 x i32]* [[ARR]] to i32* 59; CHECK-NEXT: call void @use(i32* nonnull [[PTR]]) 60; CHECK-NEXT: ret void 61; 62entry: 63 %arr = alloca [4 x i32], align 4 64 br label %for.body 65 66for.body: ; preds = %for.body, %entry 67 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 68 %indvars.iv.tr = trunc i64 %indvars.iv to i32 69 %shl.0 = shl i32 %indvars.iv.tr, 3 70 %shl.1 = shl i32 16, %shl.0 71 %or = or i32 %shl.1, %shl.0 72 %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %arr, i64 0, i64 %indvars.iv 73 store i32 %or, i32* %arrayidx, align 4 74 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 75 %exitcond = icmp eq i64 %indvars.iv, 3 76 br i1 %exitcond, label %for.cond.cleanup, label %for.body 77 78for.cond.cleanup: ; preds = %for.cond 79 %ptr = bitcast [4 x i32]* %arr to i32* 80 call void @use(i32* nonnull %ptr) #4 81 ret void 82} 83 84define void @fully_unrolled_smaller_Oz() #1 { 85; CHECK-LABEL: @fully_unrolled_smaller_Oz( 86; CHECK-NEXT: entry: 87; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 88; CHECK-NEXT: br label [[FOR_BODY:%.*]] 89; CHECK: for.body: 90; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR]], i64 0, i64 0 91; CHECK-NEXT: store i32 16, i32* [[ARRAYIDX]], align 4 92; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR]], i64 0, i64 1 93; CHECK-NEXT: store i32 4104, i32* [[ARRAYIDX_1]], align 4 94; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR]], i64 0, i64 2 95; CHECK-NEXT: store i32 1048592, i32* [[ARRAYIDX_2]], align 4 96; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR]], i64 0, i64 3 97; CHECK-NEXT: store i32 268435480, i32* [[ARRAYIDX_3]], align 4 98; CHECK-NEXT: [[PTR:%.*]] = bitcast [4 x i32]* [[ARR]] to i32* 99; CHECK-NEXT: call void @use(i32* nonnull [[PTR]]) 100; CHECK-NEXT: ret void 101; 102entry: 103 %arr = alloca [4 x i32], align 4 104 br label %for.body 105 106for.body: ; preds = %for.body, %entry 107 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 108 %indvars.iv.tr = trunc i64 %indvars.iv to i32 109 %shl.0 = shl i32 %indvars.iv.tr, 3 110 %shl.1 = shl i32 16, %shl.0 111 %or = or i32 %shl.1, %shl.0 112 %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %arr, i64 0, i64 %indvars.iv 113 store i32 %or, i32* %arrayidx, align 4 114 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 115 %exitcond = icmp eq i64 %indvars.iv, 3 116 br i1 %exitcond, label %for.cond.cleanup, label %for.body 117 118for.cond.cleanup: ; preds = %for.cond 119 %ptr = bitcast [4 x i32]* %arr to i32* 120 call void @use(i32* nonnull %ptr) #4 121 ret void 122} 123 124 125define void @fully_unrolled_bigger() #0 { 126; CHECK-LABEL: @fully_unrolled_bigger( 127; CHECK-NEXT: entry: 128; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 129; CHECK-NEXT: br label [[FOR_BODY:%.*]] 130; CHECK: for.body: 131; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] 132; CHECK-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32 133; CHECK-NEXT: [[SHL_0:%.*]] = shl i32 [[INDVARS_IV_TR]], 3 134; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 16, [[SHL_0]] 135; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL_1]], [[SHL_0]] 136; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[ARR]], i64 0, i64 [[INDVARS_IV]] 137; CHECK-NEXT: store i32 [[OR]], i32* [[ARRAYIDX]], align 4 138; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 139; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], 7 140; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] 141; CHECK: for.cond.cleanup: 142; CHECK-NEXT: [[PTR:%.*]] = bitcast [4 x i32]* [[ARR]] to i32* 143; CHECK-NEXT: call void @use(i32* nonnull [[PTR]]) 144; CHECK-NEXT: ret void 145; 146entry: 147 %arr = alloca [4 x i32], align 4 148 br label %for.body 149 150for.body: ; preds = %for.body, %entry 151 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 152 %indvars.iv.tr = trunc i64 %indvars.iv to i32 153 %shl.0 = shl i32 %indvars.iv.tr, 3 154 %shl.1 = shl i32 16, %shl.0 155 %or = or i32 %shl.1, %shl.0 156 %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %arr, i64 0, i64 %indvars.iv 157 store i32 %or, i32* %arrayidx, align 4 158 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 159 %exitcond = icmp eq i64 %indvars.iv, 7 160 br i1 %exitcond, label %for.cond.cleanup, label %for.body 161 162for.cond.cleanup: ; preds = %for.cond 163 %ptr = bitcast [4 x i32]* %arr to i32* 164 call void @use(i32* nonnull %ptr) #4 165 ret void 166} 167 168declare void @use(i32*) 169 170attributes #0 = { optsize } 171attributes #1 = { minsize optsize } 172