1; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s 2target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 3 4@known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1], align 16 5 6; We should be able to propagate constant data through different types of 7; casts. For example, in this test we have a load, which becomes constant after 8; unrolling, which then is truncated to i8. Obviously, truncated value is also a 9; constant, which can be used in the further simplifications. 10; 11; We expect this loop to be unrolled, because in this case load would become 12; constant, which is 0 in many cases, and which, in its turn, helps to simplify 13; following multiplication and addition. In total, unrolling should help to 14; optimize ~60% of all instructions in this case. 15; 16; CHECK-LABEL: @const_load_trunc 17; CHECK-NOT: br i1 18; CHECK: ret i8 % 19define i8 @const_load_trunc(i32* noalias nocapture readonly %src) { 20entry: 21 br label %loop 22 23loop: ; preds = %loop, %entry 24 %iv = phi i64 [ 0, %entry ], [ %inc, %loop ] 25 %r = phi i8 [ 0, %entry ], [ %add, %loop ] 26 %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv 27 %src_element = load i32, i32* %arrayidx, align 4 28 %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv 29 %const_array_element = load i32, i32* %array_const_idx, align 4 30 %x = trunc i32 %src_element to i8 31 %y = trunc i32 %const_array_element to i8 32 %mul = mul nsw i8 %x, %y 33 %add = add nsw i8 %mul, %r 34 %inc = add nuw nsw i64 %iv, 1 35 %exitcond86.i = icmp eq i64 %inc, 10 36 br i1 %exitcond86.i, label %loop.end, label %loop 37 38loop.end: ; preds = %loop 39 %r.lcssa = phi i8 [ %r, %loop ] 40 ret i8 %r.lcssa 41} 42 43; The same test as before, but with ZEXT instead of TRUNC. 44; CHECK-LABEL: @const_load_zext 45; CHECK-NOT: br i1 46; CHECK: ret i64 % 47define i64 @const_load_zext(i32* noalias nocapture readonly %src) { 48entry: 49 br label %loop 50 51loop: ; preds = %loop, %entry 52 %iv = phi i64 [ 0, %entry ], [ %inc, %loop ] 53 %r = phi i64 [ 0, %entry ], [ %add, %loop ] 54 %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv 55 %src_element = load i32, i32* %arrayidx, align 4 56 %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv 57 %const_array_element = load i32, i32* %array_const_idx, align 4 58 %x = zext i32 %src_element to i64 59 %y = zext i32 %const_array_element to i64 60 %mul = mul nsw i64 %x, %y 61 %add = add nsw i64 %mul, %r 62 %inc = add nuw nsw i64 %iv, 1 63 %exitcond86.i = icmp eq i64 %inc, 10 64 br i1 %exitcond86.i, label %loop.end, label %loop 65 66loop.end: ; preds = %loop 67 %r.lcssa = phi i64 [ %r, %loop ] 68 ret i64 %r.lcssa 69} 70 71; The same test as the first one, but with SEXT instead of TRUNC. 72; CHECK-LABEL: @const_load_sext 73; CHECK-NOT: br i1 74; CHECK: ret i64 % 75define i64 @const_load_sext(i32* noalias nocapture readonly %src) { 76entry: 77 br label %loop 78 79loop: ; preds = %loop, %entry 80 %iv = phi i64 [ 0, %entry ], [ %inc, %loop ] 81 %r = phi i64 [ 0, %entry ], [ %add, %loop ] 82 %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv 83 %src_element = load i32, i32* %arrayidx, align 4 84 %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv 85 %const_array_element = load i32, i32* %array_const_idx, align 4 86 %x = sext i32 %src_element to i64 87 %y = sext i32 %const_array_element to i64 88 %mul = mul nsw i64 %x, %y 89 %add = add nsw i64 %mul, %r 90 %inc = add nuw nsw i64 %iv, 1 91 %exitcond86.i = icmp eq i64 %inc, 10 92 br i1 %exitcond86.i, label %loop.end, label %loop 93 94loop.end: ; preds = %loop 95 %r.lcssa = phi i64 [ %r, %loop ] 96 ret i64 %r.lcssa 97} 98