1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}simple_nested_if:
4; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
5; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
6; GCN:      s_and_b64 exec, exec, vcc
7; GCN-NEXT: s_cbranch_execz [[ENDIF]]
8; GCN-NEXT: ; %bb.{{[0-9]+}}:
9; GCN:      store_dword
10; GCN-NEXT: {{^}}[[ENDIF]]:
11; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
12; GCN: ds_write_b32
13; GCN: s_endpgm
14
15define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
16bb:
17  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
18  %tmp1 = icmp ugt i32 %tmp, 1
19  br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
20
21bb.outer.then:                                    ; preds = %bb
22  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
23  store i32 0, i32 addrspace(1)* %tmp4, align 4
24  %tmp5 = icmp eq i32 %tmp, 2
25  br i1 %tmp5, label %bb.outer.end, label %bb.inner.then
26
27bb.inner.then:                                    ; preds = %bb.outer.then
28  %tmp7 = add i32 %tmp, 1
29  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
30  store i32 1, i32 addrspace(1)* %tmp9, align 4
31  br label %bb.outer.end
32
33bb.outer.end:                                     ; preds = %bb.outer.then, %bb.inner.then, %bb
34  store i32 3, i32 addrspace(3)* null
35  ret void
36}
37
38; GCN-LABEL: {{^}}uncollapsable_nested_if:
39; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
40; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
41; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
42; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:BB[0-9_]+]]
43; GCN-NEXT: ; %bb.{{[0-9]+}}:
44; GCN:      store_dword
45; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
46; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
47; GCN:      store_dword
48; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
49; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
50; GCN: ds_write_b32
51; GCN: s_endpgm
52define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
53bb:
54  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
55  %tmp1 = icmp ugt i32 %tmp, 1
56  br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
57
58bb.outer.then:                                    ; preds = %bb
59  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
60  store i32 0, i32 addrspace(1)* %tmp4, align 4
61  %tmp5 = icmp eq i32 %tmp, 2
62  br i1 %tmp5, label %bb.inner.end, label %bb.inner.then
63
64bb.inner.then:                                    ; preds = %bb.outer.then
65  %tmp7 = add i32 %tmp, 1
66  %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
67  store i32 1, i32 addrspace(1)* %tmp8, align 4
68  br label %bb.inner.end
69
70bb.inner.end:                                     ; preds = %bb.inner.then, %bb.outer.then
71  %tmp9 = add i32 %tmp, 2
72  %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9
73  store i32 2, i32 addrspace(1)* %tmp10, align 4
74  br label %bb.outer.end
75
76bb.outer.end:                                     ; preds = %bb.inner.then, %bb
77  store i32 3, i32 addrspace(3)* null
78  ret void
79}
80
81; GCN-LABEL: {{^}}nested_if_if_else:
82; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
83; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
84; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
85; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
86; GCN-NEXT: s_cbranch_execz [[THEN_INNER:BB[0-9_]+]]
87; GCN-NEXT: ; %bb.{{[0-9]+}}:
88; GCN:      store_dword
89; GCN-NEXT: {{^}}[[THEN_INNER]]:
90; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]]
91; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]]
92; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
93; GCN:      store_dword
94; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
95; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
96; GCN: ds_write_b32
97; GCN: s_endpgm
98define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
99bb:
100  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
101  %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
102  store i32 0, i32 addrspace(1)* %tmp1, align 4
103  %tmp2 = icmp ugt i32 %tmp, 1
104  br i1 %tmp2, label %bb.outer.then, label %bb.outer.end
105
106bb.outer.then:                                       ; preds = %bb
107  %tmp5 = icmp eq i32 %tmp, 2
108  br i1 %tmp5, label %bb.then, label %bb.else
109
110bb.then:                                             ; preds = %bb.outer.then
111  %tmp3 = add i32 %tmp, 1
112  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3
113  store i32 1, i32 addrspace(1)* %tmp4, align 4
114  br label %bb.outer.end
115
116bb.else:                                             ; preds = %bb.outer.then
117  %tmp7 = add i32 %tmp, 2
118  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
119  store i32 2, i32 addrspace(1)* %tmp9, align 4
120  br label %bb.outer.end
121
122bb.outer.end:                                        ; preds = %bb, %bb.then, %bb.else
123  store i32 3, i32 addrspace(3)* null
124  ret void
125}
126
127; GCN-LABEL: {{^}}nested_if_else_if:
128; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
129; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
130; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:BB[0-9_]+]]
131; GCN-NEXT: ; %bb.{{[0-9]+}}:
132; GCN:      store_dword
133; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
134; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:BB[0-9_]+]]
135; GCN-NEXT: ; %bb.{{[0-9]+}}:
136; GCN:      store_dword
137; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
138; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
139; GCN-NEXT: {{^}}[[THEN_OUTER]]:
140; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
141; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
142; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
143; GCN-NEXT: ; %bb.{{[0-9]+}}:
144; GCN:      store_dword
145; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_ELSE:s\[[0-9:]+\]]],
146; GCN-NEXT: s_cbranch_execz [[FLOW1:BB[0-9_]+]]
147; GCN-NEXT: ; %bb.{{[0-9]+}}:
148; GCN:      store_dword
149; GCN-NEXT: [[FLOW1]]:
150; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_ELSE]]
151; GCN:      s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]]
152; GCN:      ds_write_b32
153; GCN:      s_endpgm
154define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
155bb:
156  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
157  %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
158  store i32 0, i32 addrspace(1)* %tmp1, align 4
159  %cc1 = icmp ugt i32 %tmp, 1
160  br i1 %cc1, label %bb.outer.then, label %bb.outer.else
161
162bb.outer.then:
163  %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1
164  store i32 1, i32 addrspace(1)* %tmp2, align 4
165  %cc2 = icmp eq i32 %tmp, 2
166  br i1 %cc2, label %bb.inner.then, label %bb.outer.end
167
168bb.inner.then:
169  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2
170  store i32 2, i32 addrspace(1)* %tmp3, align 4
171  br label %bb.outer.end
172
173bb.outer.else:
174  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3
175  store i32 3, i32 addrspace(1)* %tmp4, align 4
176  %cc3 = icmp eq i32 %tmp, 2
177  br i1 %cc3, label %bb.inner.then2, label %bb.outer.end
178
179bb.inner.then2:
180  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4
181  store i32 4, i32 addrspace(1)* %tmp5, align 4
182  br label %bb.outer.end
183
184bb.outer.end:
185  store i32 3, i32 addrspace(3)* null
186  ret void
187}
188
189; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
190; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
191; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
192; GCN-NEXT: ; %bb.{{[0-9]+}}:
193; GCN:      store_dword
194; GCN-NEXT: {{^}}[[ENDIF]]:
195; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
196; GCN:      s_barrier
197; GCN-NEXT: s_endpgm
198define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
199bb:
200  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
201  %tmp1 = icmp ugt i32 %tmp, 1
202  br i1 %tmp1, label %bb.then, label %bb.end
203
204bb.then:                                          ; preds = %bb
205  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
206  store i32 0, i32 addrspace(1)* %tmp4, align 4
207  br label %bb.end
208
209bb.end:                                           ; preds = %bb.then, %bb
210  call void @llvm.amdgcn.s.barrier()
211  ret void
212}
213
214; GCN-LABEL: {{^}}scc_liveness:
215
216; GCN: [[BB1_OUTER_LOOP:BB[0-9]+_[0-9]+]]:
217; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
218;
219; GCN: [[BB1_INNER_LOOP:BB[0-9]+_[0-9]+]]:
220; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
221; GCN: s_andn2_b64
222; GCN-NEXT: s_cbranch_execz
223
224; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
225; GCN: s_andn2_b64 exec, exec,
226; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
227
228; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen
229
230; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER]], {{vcc|s\[[0-9:]+\]}}
231; GCN-NEXT: s_cbranch_execz [[BB1_OUTER_LOOP]]
232
233; GCN-NOT: s_or_b64 exec, exec
234
235; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
236; GCN: buffer_store_dword
237; GCN: buffer_store_dword
238; GCN: buffer_store_dword
239; GCN: buffer_store_dword
240; GCN: s_setpc_b64
241define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
242bb:
243  br label %bb1
244
245bb1:                                              ; preds = %Flow1, %bb1, %bb
246  %tmp = icmp slt i32 %arg, 519
247  br i1 %tmp, label %bb2, label %bb1
248
249bb2:                                              ; preds = %bb1
250  %tmp3 = icmp eq i32 %arg, 0
251  br i1 %tmp3, label %bb4, label %bb10
252
253bb4:                                              ; preds = %bb2
254  %tmp6 = load float, float addrspace(5)* undef
255  %tmp7 = fcmp olt float %tmp6, 0.0
256  br i1 %tmp7, label %bb8, label %Flow
257
258bb8:                                              ; preds = %bb4
259  %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1
260  br label %Flow
261
262Flow:                                             ; preds = %bb8, %bb4
263  %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ]
264  br label %bb10
265
266bb10:                                             ; preds = %Flow, %bb2
267  %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ]
268  br i1 %tmp3, label %bb12, label %Flow1
269
270Flow1:                                            ; preds = %bb10
271  br label %bb1
272
273bb12:                                             ; preds = %bb10
274  store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16
275  ret void
276}
277
278declare i32 @llvm.amdgcn.workitem.id.x() #0
279declare void @llvm.amdgcn.s.barrier() #1
280
281attributes #0 = { nounwind readnone speculatable }
282attributes #1 = { nounwind convergent }
283attributes #2 = { nounwind }
284