1; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor | FileCheck %s 2; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor -enable-falkor-hwpf-unroll-fix=0 | FileCheck %s --check-prefix=NOHWPF 3 4; Check that loop unroller doesn't exhaust HW prefetcher resources. 5 6; Partial unroll 2 times for this loop on falkor instead of 4. 7; NOHWPF-LABEL: @unroll1( 8; NOHWPF-LABEL: loop: 9; NOHWPF-NEXT: phi 10; NOHWPF-NEXT: getelementptr 11; NOHWPF-NEXT: load 12; NOHWPF-NEXT: getelementptr 13; NOHWPF-NEXT: load 14; NOHWPF-NEXT: add 15; NOHWPF-NEXT: getelementptr 16; NOHWPF-NEXT: load 17; NOHWPF-NEXT: getelementptr 18; NOHWPF-NEXT: load 19; NOHWPF-NEXT: add 20; NOHWPF-NEXT: getelementptr 21; NOHWPF-NEXT: load 22; NOHWPF-NEXT: getelementptr 23; NOHWPF-NEXT: load 24; NOHWPF-NEXT: add 25; NOHWPF-NEXT: getelementptr 26; NOHWPF-NEXT: load 27; NOHWPF-NEXT: getelementptr 28; NOHWPF-NEXT: load 29; NOHWPF-NEXT: add 30; NOHWPF-NEXT: icmp 31; NOHWPF-NEXT: br 32; NOHWPF-NEXT-LABEL: exit: 33; 34; CHECK-LABEL: @unroll1( 35; CHECK-LABEL: loop: 36; CHECK-NEXT: phi 37; CHECK-NEXT: getelementptr 38; CHECK-NEXT: load 39; CHECK-NEXT: getelementptr 40; CHECK-NEXT: load 41; CHECK-NEXT: add 42; CHECK-NEXT: getelementptr 43; CHECK-NEXT: load 44; CHECK-NEXT: getelementptr 45; CHECK-NEXT: load 46; CHECK-NEXT: add 47; CHECK-NEXT: icmp 48; CHECK-NEXT: br 49; CHECK-NEXT-LABEL: exit: 50define void @unroll1(i32* %p, i32* %p2) { 51entry: 52 br label %loop 53 54loop: 55 %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] 56 57 %gep = getelementptr inbounds i32, i32* %p, i32 %iv 58 %load = load volatile i32, i32* %gep 59 60 %gep2 = getelementptr inbounds i32, i32* %p2, i32 %iv 61 %load2 = load volatile i32, i32* %gep2 62 63 %inc = add i32 %iv, 1 64 %exitcnd = icmp uge i32 %inc, 1024 65 br i1 %exitcnd, label %exit, label %loop 66 67exit: 68 ret void 69} 70 71; Partial unroll 4 times for this loop on falkor instead of 8. 72; NOHWPF-LABEL: @unroll2( 73; NOHWPF-LABEL: loop2: 74; NOHWPF-NEXT: phi 75; NOHWPF-NEXT: phi 76; NOHWPF-NEXT: getelementptr 77; NOHWPF-NEXT: load 78; NOHWPF-NEXT: add 79; NOHWPF-NEXT: add 80; NOHWPF-NEXT: getelementptr 81; NOHWPF-NEXT: load 82; NOHWPF-NEXT: add 83; NOHWPF-NEXT: add 84; NOHWPF-NEXT: getelementptr 85; NOHWPF-NEXT: load 86; NOHWPF-NEXT: add 87; NOHWPF-NEXT: add 88; NOHWPF-NEXT: getelementptr 89; NOHWPF-NEXT: load 90; NOHWPF-NEXT: add 91; NOHWPF-NEXT: add 92; NOHWPF-NEXT: getelementptr 93; NOHWPF-NEXT: load 94; NOHWPF-NEXT: add 95; NOHWPF-NEXT: add 96; NOHWPF-NEXT: getelementptr 97; NOHWPF-NEXT: load 98; NOHWPF-NEXT: add 99; NOHWPF-NEXT: add 100; NOHWPF-NEXT: getelementptr 101; NOHWPF-NEXT: load 102; NOHWPF-NEXT: add 103; NOHWPF-NEXT: add 104; NOHWPF-NEXT: getelementptr 105; NOHWPF-NEXT: load 106; NOHWPF-NEXT: add 107; NOHWPF-NEXT: add 108; NOHWPF-NEXT: icmp 109; NOHWPF-NEXT: br 110; NOHWPF-NEXT-LABEL: exit2: 111; 112; CHECK-LABEL: @unroll2( 113; CHECK-LABEL: loop2: 114; CHECK-NEXT: phi 115; CHECK-NEXT: phi 116; CHECK-NEXT: getelementptr 117; CHECK-NEXT: load 118; CHECK-NEXT: add 119; CHECK-NEXT: add 120; CHECK-NEXT: getelementptr 121; CHECK-NEXT: load 122; CHECK-NEXT: add 123; CHECK-NEXT: add 124; CHECK-NEXT: getelementptr 125; CHECK-NEXT: load 126; CHECK-NEXT: add 127; CHECK-NEXT: add 128; CHECK-NEXT: getelementptr 129; CHECK-NEXT: load 130; CHECK-NEXT: add 131; CHECK-NEXT: add 132; CHECK-NEXT: icmp 133; CHECK-NEXT: br 134; CHECK-NEXT-LABEL: exit2: 135 136define void @unroll2(i32* %p) { 137entry: 138 br label %loop1 139 140loop1: 141 %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ] 142 %outer.sum = phi i32 [ 0, %entry ], [ %sum, %loop1.latch ] 143 br label %loop2.header 144 145loop2.header: 146 br label %loop2 147 148loop2: 149 %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ] 150 %sum = phi i32 [ %outer.sum, %loop2.header ], [ %sum.inc, %loop2 ] 151 %gep = getelementptr inbounds i32, i32* %p, i32 %iv2 152 %load = load i32, i32* %gep 153 %sum.inc = add i32 %sum, %load 154 %inc2 = add i32 %iv2, 1 155 %exitcnd2 = icmp uge i32 %inc2, 1024 156 br i1 %exitcnd2, label %exit2, label %loop2 157 158exit2: 159 br label %loop1.latch 160 161loop1.latch: 162 %inc1 = add i32 %iv1, 1 163 %exitcnd1 = icmp uge i32 %inc1, 1024 164 br i1 %exitcnd2, label %exit, label %loop1 165 166exit: 167 ret void 168} 169 170