1; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor | FileCheck %s
2; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor -enable-falkor-hwpf-unroll-fix=0 | FileCheck %s --check-prefix=NOHWPF
3
4; Check that loop unroller doesn't exhaust HW prefetcher resources.
5
6; Partial unroll 2 times for this loop on falkor instead of 4.
7; NOHWPF-LABEL: @unroll1(
8; NOHWPF-LABEL: loop:
9; NOHWPF-NEXT: phi
10; NOHWPF-NEXT: getelementptr
11; NOHWPF-NEXT: load
12; NOHWPF-NEXT: getelementptr
13; NOHWPF-NEXT: load
14; NOHWPF-NEXT: add
15; NOHWPF-NEXT: getelementptr
16; NOHWPF-NEXT: load
17; NOHWPF-NEXT: getelementptr
18; NOHWPF-NEXT: load
19; NOHWPF-NEXT: add
20; NOHWPF-NEXT: getelementptr
21; NOHWPF-NEXT: load
22; NOHWPF-NEXT: getelementptr
23; NOHWPF-NEXT: load
24; NOHWPF-NEXT: add
25; NOHWPF-NEXT: getelementptr
26; NOHWPF-NEXT: load
27; NOHWPF-NEXT: getelementptr
28; NOHWPF-NEXT: load
29; NOHWPF-NEXT: add
30; NOHWPF-NEXT: icmp
31; NOHWPF-NEXT: br
32; NOHWPF-NEXT-LABEL: exit:
33;
34; CHECK-LABEL: @unroll1(
35; CHECK-LABEL: loop:
36; CHECK-NEXT: phi
37; CHECK-NEXT: getelementptr
38; CHECK-NEXT: load
39; CHECK-NEXT: getelementptr
40; CHECK-NEXT: load
41; CHECK-NEXT: add
42; CHECK-NEXT: getelementptr
43; CHECK-NEXT: load
44; CHECK-NEXT: getelementptr
45; CHECK-NEXT: load
46; CHECK-NEXT: add
47; CHECK-NEXT: icmp
48; CHECK-NEXT: br
49; CHECK-NEXT-LABEL: exit:
50define void @unroll1(i32* %p, i32* %p2) {
51entry:
52  br label %loop
53
54loop:
55  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
56
57  %gep = getelementptr inbounds i32, i32* %p, i32 %iv
58  %load = load volatile i32, i32* %gep
59
60  %gep2 = getelementptr inbounds i32, i32* %p2, i32 %iv
61  %load2 = load volatile i32, i32* %gep2
62
63  %inc = add i32 %iv, 1
64  %exitcnd = icmp uge i32 %inc, 1024
65  br i1 %exitcnd, label %exit, label %loop
66
67exit:
68  ret void
69}
70
71; Partial unroll 4 times for this loop on falkor instead of 8.
72; NOHWPF-LABEL: @unroll2(
73; NOHWPF-LABEL: loop2:
74; NOHWPF-NEXT: phi
75; NOHWPF-NEXT: phi
76; NOHWPF-NEXT: getelementptr
77; NOHWPF-NEXT: load
78; NOHWPF-NEXT: add
79; NOHWPF-NEXT: add
80; NOHWPF-NEXT: getelementptr
81; NOHWPF-NEXT: load
82; NOHWPF-NEXT: add
83; NOHWPF-NEXT: add
84; NOHWPF-NEXT: getelementptr
85; NOHWPF-NEXT: load
86; NOHWPF-NEXT: add
87; NOHWPF-NEXT: add
88; NOHWPF-NEXT: getelementptr
89; NOHWPF-NEXT: load
90; NOHWPF-NEXT: add
91; NOHWPF-NEXT: add
92; NOHWPF-NEXT: getelementptr
93; NOHWPF-NEXT: load
94; NOHWPF-NEXT: add
95; NOHWPF-NEXT: add
96; NOHWPF-NEXT: getelementptr
97; NOHWPF-NEXT: load
98; NOHWPF-NEXT: add
99; NOHWPF-NEXT: add
100; NOHWPF-NEXT: getelementptr
101; NOHWPF-NEXT: load
102; NOHWPF-NEXT: add
103; NOHWPF-NEXT: add
104; NOHWPF-NEXT: getelementptr
105; NOHWPF-NEXT: load
106; NOHWPF-NEXT: add
107; NOHWPF-NEXT: add
108; NOHWPF-NEXT: icmp
109; NOHWPF-NEXT: br
110; NOHWPF-NEXT-LABEL: exit2:
111;
112; CHECK-LABEL: @unroll2(
113; CHECK-LABEL: loop2:
114; CHECK-NEXT: phi
115; CHECK-NEXT: phi
116; CHECK-NEXT: getelementptr
117; CHECK-NEXT: load
118; CHECK-NEXT: add
119; CHECK-NEXT: add
120; CHECK-NEXT: getelementptr
121; CHECK-NEXT: load
122; CHECK-NEXT: add
123; CHECK-NEXT: add
124; CHECK-NEXT: getelementptr
125; CHECK-NEXT: load
126; CHECK-NEXT: add
127; CHECK-NEXT: add
128; CHECK-NEXT: getelementptr
129; CHECK-NEXT: load
130; CHECK-NEXT: add
131; CHECK-NEXT: add
132; CHECK-NEXT: icmp
133; CHECK-NEXT: br
134; CHECK-NEXT-LABEL: exit2:
135
136define void @unroll2(i32* %p) {
137entry:
138  br label %loop1
139
140loop1:
141  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
142  %outer.sum = phi i32 [ 0, %entry ], [ %sum, %loop1.latch ]
143  br label %loop2.header
144
145loop2.header:
146  br label %loop2
147
148loop2:
149  %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
150  %sum = phi i32 [ %outer.sum, %loop2.header ], [ %sum.inc, %loop2 ]
151  %gep = getelementptr inbounds i32, i32* %p, i32 %iv2
152  %load = load i32, i32* %gep
153  %sum.inc = add i32 %sum, %load
154  %inc2 = add i32 %iv2, 1
155  %exitcnd2 = icmp uge i32 %inc2, 1024
156  br i1 %exitcnd2, label %exit2, label %loop2
157
158exit2:
159  br label %loop1.latch
160
161loop1.latch:
162  %inc1 = add i32 %iv1, 1
163  %exitcnd1 = icmp uge i32 %inc1, 1024
164  br i1 %exitcnd2, label %exit, label %loop1
165
166exit:
167  ret void
168}
169
170