1; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -loop-unroll < %s | FileCheck %s
2
3; Test that max iterations count to analyze (specific for the target)
4; is enough to make the inner loop completely unrolled
5; CHECK-LABEL: foo
6define void @foo(float addrspace(5)* %ptrB, float addrspace(5)* %ptrC, i32 %A, i32 %A2, float %M) {
7bb:
8  br label %bb2
9
10bb2:                                              ; preds = %bb7, %bb
11  %i = phi i32 [ 0, %bb ], [ %i8, %bb7 ]
12  br label %bb4
13
14bb3:                                              ; preds = %bb7
15  ret void
16
17bb4:                                              ; preds = %bb10, %bb2
18  %i5 = phi i32 [ 0, %bb2 ], [ %i11, %bb10 ]
19  %i6 = add nuw nsw i32 %i5, %i
20  br label %for.body
21
22bb7:                                              ; preds = %bb10
23  %i8 = add nuw nsw i32 %i, 1
24  %i9 = icmp eq i32 %i8, 8
25  br i1 %i9, label %bb3, label %bb2
26
27bb10:                                             ; preds = %for.body
28  %i11 = add nuw nsw i32 %i5, 1
29  %cmpj = icmp ult i32 %i11, 8
30  br i1 %cmpj, label %bb7, label %bb4
31
32; CHECK-LABEL: for.body
33; CHECK-NOT: %phi = phi {{.*}}
34for.body:                                       ; preds = %bb4, %for.body
35  %phi = phi i32 [ 0, %bb4 ], [ %inc, %for.body ]
36  %mul = shl nuw nsw i32 %phi, 6
37  %add = add i32 %A, %mul
38  %arrayidx = getelementptr inbounds float, float addrspace(5)* %ptrC, i32 %add
39  %ld1 = load float, float addrspace(5)* %arrayidx, align 4
40  %mul2 = shl nuw nsw i32 %phi, 3
41  %add2 = add i32 %A2, %mul2
42  %arrayidx2 = getelementptr inbounds float, float addrspace(5)* %ptrB, i32 %add2
43  %ld2 = load float, float addrspace(5)* %arrayidx2, align 4
44  %mul3 = fmul contract float %M, %ld2
45  %add3 = fadd contract float %ld1, %mul3
46  store float %add3, float addrspace(5)* %arrayidx, align 4
47  %add1 = add nuw nsw i32 %add, 2048
48  %arrayidx3 = getelementptr inbounds float, float addrspace(5)* %ptrC, i32 %add1
49  %ld3 = load float, float addrspace(5)* %arrayidx3, align 4
50  %mul4 = fmul contract float %ld2, %M
51  %add4 = fadd contract float %ld3, %mul4
52  store float %add4, float addrspace(5)* %arrayidx3, align 4
53  %inc = add nuw nsw i32 %phi, 1
54  %cmpi = icmp ult i32 %phi, 31
55  br i1 %cmpi, label %for.body, label %bb10
56}
57