1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 3; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s 4 5; Test LSR for giving small constants, which get re-associated as unfolded 6; offset, a chance to get combined with loop-invariant registers (same as 7; large constants which do not fit as add immediate operands). LSR 8; favors here to bump the base pointer outside the loop. 9 10; float test(float *arr, long long start, float threshold) { 11; for (long long i = start; i != 0; ++i) { 12; float x = arr[i + 7]; 13; if (x > threshold) 14; return x; 15; } 16; return -7; 17; } 18define float @test1(float* nocapture readonly %arr, i64 %start, float %threshold) { 19; CHECK-LABEL: test1: 20; CHECK: // %bb.0: // %entry 21; CHECK-NEXT: cbz x1, .LBB0_4 22; CHECK-NEXT: // %bb.1: // %for.body.preheader 23; CHECK-NEXT: add x8, x0, #28 // =28 24; CHECK-NEXT: .LBB0_2: // %for.body 25; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 26; CHECK-NEXT: ldr s1, [x8, x1, lsl #2] 27; CHECK-NEXT: fcmp s1, s0 28; CHECK-NEXT: b.gt .LBB0_5 29; CHECK-NEXT: // %bb.3: // %for.cond 30; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 31; CHECK-NEXT: add x1, x1, #1 // =1 32; CHECK-NEXT: cbnz x1, .LBB0_2 33; CHECK-NEXT: .LBB0_4: 34; CHECK-NEXT: fmov s0, #-7.00000000 35; CHECK-NEXT: ret 36; CHECK-NEXT: .LBB0_5: // %cleanup2 37; CHECK-NEXT: mov v0.16b, v1.16b 38; CHECK-NEXT: ret 39entry: 40 %cmp11 = icmp eq i64 %start, 0 41 br i1 %cmp11, label %cleanup2, label %for.body 42 43for.cond: ; preds = %for.body 44 %cmp = icmp eq i64 %inc, 0 45 br i1 %cmp, label %cleanup2, label %for.body 46 47for.body: ; preds = %entry, %for.cond 48 %i.012 = phi i64 [ %inc, %for.cond ], [ %start, %entry ] 49 %add = add nsw i64 %i.012, 7 50 %arrayidx = getelementptr inbounds float, float* %arr, i64 %add 51 %0 = load float, float* %arrayidx, align 4 52 %cmp1 = fcmp ogt float %0, %threshold 53 %inc = add nsw i64 %i.012, 1 54 br i1 %cmp1, label %cleanup2, label %for.cond 55 56cleanup2: ; preds = %for.cond, %for.body, %entry 57 %1 = phi float [ -7.000000e+00, %entry ], [ %0, %for.body ], [ -7.000000e+00, %for.cond ] 58 ret float %1 59} 60 61; Same as test1, except i has another use: 62; if (x > threshold) ---> if (x > threshold + i) 63define float @test2(float* nocapture readonly %arr, i64 %start, float %threshold) { 64; CHECK-LABEL: test2: 65; CHECK: // %bb.0: // %entry 66; CHECK-NEXT: cbz x1, .LBB1_4 67; CHECK-NEXT: // %bb.1: // %for.body.preheader 68; CHECK-NEXT: add x8, x0, #28 // =28 69; CHECK-NEXT: .LBB1_2: // %for.body 70; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 71; CHECK-NEXT: ldr s1, [x8, x1, lsl #2] 72; CHECK-NEXT: scvtf s2, x1 73; CHECK-NEXT: fadd s2, s2, s0 74; CHECK-NEXT: fcmp s1, s2 75; CHECK-NEXT: b.gt .LBB1_5 76; CHECK-NEXT: // %bb.3: // %for.cond 77; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 78; CHECK-NEXT: add x1, x1, #1 // =1 79; CHECK-NEXT: cbnz x1, .LBB1_2 80; CHECK-NEXT: .LBB1_4: 81; CHECK-NEXT: fmov s0, #-7.00000000 82; CHECK-NEXT: ret 83; CHECK-NEXT: .LBB1_5: // %cleanup4 84; CHECK-NEXT: mov v0.16b, v1.16b 85; CHECK-NEXT: ret 86entry: 87 %cmp14 = icmp eq i64 %start, 0 88 br i1 %cmp14, label %cleanup4, label %for.body 89 90for.cond: ; preds = %for.body 91 %cmp = icmp eq i64 %inc, 0 92 br i1 %cmp, label %cleanup4, label %for.body 93 94for.body: ; preds = %entry, %for.cond 95 %i.015 = phi i64 [ %inc, %for.cond ], [ %start, %entry ] 96 %add = add nsw i64 %i.015, 7 97 %arrayidx = getelementptr inbounds float, float* %arr, i64 %add 98 %0 = load float, float* %arrayidx, align 4 99 %conv = sitofp i64 %i.015 to float 100 %add1 = fadd float %conv, %threshold 101 %cmp2 = fcmp ogt float %0, %add1 102 %inc = add nsw i64 %i.015, 1 103 br i1 %cmp2, label %cleanup4, label %for.cond 104 105cleanup4: ; preds = %for.cond, %for.body, %entry 106 %1 = phi float [ -7.000000e+00, %entry ], [ %0, %for.body ], [ -7.000000e+00, %for.cond ] 107 ret float %1 108} 109