1; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
2
3; Test that loops with different maximum offsets for different address
4; spaces are correctly handled.
5
6target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
7
8; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32(
9; OPT: {{^}}.lr.ph:
10; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
11; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095
12; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1
13define void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
14bb:
15  %tmp = icmp sgt i32 %n, 0
16  br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
17
18.lr.ph.preheader:                                 ; preds = %bb
19  br label %.lr.ph
20
21._crit_edge.loopexit:                             ; preds = %.lr.ph
22  br label %._crit_edge
23
24._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
25  ret void
26
27.lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
28  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
29  %tmp1 = add nuw nsw i64 %indvars.iv, 4095
30  %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
31  %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
32  %tmp4 = sext i8 %tmp3 to i32
33  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
34  %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
35  %tmp7 = add nsw i32 %tmp6, %tmp4
36  store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
37  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
38  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
39  %exitcond = icmp eq i32 %lftr.wideiv, %n
40  br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
41}
42
43; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_p1_i32(
44; OPT: {{^}}.lr.ph.preheader:
45; OPT: %scevgep2 = getelementptr i8, i8 addrspace(1)* %arg1, i64 4096
46; OPT: br label %.lr.ph
47
48; OPT: {{^}}.lr.ph:
49; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
50; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1
51define void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
52bb:
53  %tmp = icmp sgt i32 %n, 0
54  br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
55
56.lr.ph.preheader:                                 ; preds = %bb
57  br label %.lr.ph
58
59._crit_edge.loopexit:                             ; preds = %.lr.ph
60  br label %._crit_edge
61
62._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
63  ret void
64
65.lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
66  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
67  %tmp1 = add nuw nsw i64 %indvars.iv, 4096
68  %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
69  %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
70  %tmp4 = sext i8 %tmp3 to i32
71  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
72  %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
73  %tmp7 = add nsw i32 %tmp6, %tmp4
74  store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
75  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
76  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
77  %exitcond = icmp eq i32 %lftr.wideiv, %n
78  br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
79}
80
81; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32(
82; OPT: {{^}}.lr.ph
83; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
84; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535
85; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1
86define void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
87bb:
88  %tmp = icmp sgt i32 %n, 0
89  br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
90
91.lr.ph.preheader:                                 ; preds = %bb
92  br label %.lr.ph
93
94._crit_edge.loopexit:                             ; preds = %.lr.ph
95  br label %._crit_edge
96
97._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
98  ret void
99
100.lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
101  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
102  %tmp1 = add nuw nsw i64 %indvars.iv, 65535
103  %tmp2 = trunc i64 %tmp1 to i32
104  %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
105  %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
106  %tmp5 = sext i8 %tmp4 to i32
107  %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
108  %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
109  %tmp8 = add nsw i32 %tmp7, %tmp5
110  store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
111  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
112  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
113  %exitcond = icmp eq i32 %lftr.wideiv, %n
114  br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
115}
116
117; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_p1_i32(
118; OPT: {{^}}.lr.ph.preheader:
119; OPT: %scevgep2 = getelementptr i8, i8 addrspace(3)* %arg1, i32 65536
120; OPT: br label %.lr.ph
121
122; OPT: {{^}}.lr.ph:
123; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
124; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1
125define void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
126bb:
127  %tmp = icmp sgt i32 %n, 0
128  br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
129
130.lr.ph.preheader:                                 ; preds = %bb
131  br label %.lr.ph
132
133._crit_edge.loopexit:                             ; preds = %.lr.ph
134  br label %._crit_edge
135
136._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
137  ret void
138
139.lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
140  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
141  %tmp1 = add nuw nsw i64 %indvars.iv, 65536
142  %tmp2 = trunc i64 %tmp1 to i32
143  %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
144  %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
145  %tmp5 = sext i8 %tmp4 to i32
146  %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
147  %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
148  %tmp8 = add nsw i32 %tmp7, %tmp5
149  store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
150  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
151  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
152  %exitcond = icmp eq i32 %lftr.wideiv, %n
153  br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
154}
155
156attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hawaii" "unsafe-fp-math"="false" "use-soft-float"="false" }
157