1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}simple_nested_if:
4; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
5; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
6; GCN-NEXT: s_cbranch_execz [[ENDIF]]
7; GCN:      s_and_b64 exec, exec, vcc
8; GCN-NEXT: ; mask branch [[ENDIF]]
9; GCN-NEXT: {{^BB[0-9_]+}}:
10; GCN:      store_dword
11; GCN-NEXT: {{^}}[[ENDIF]]:
12; GCN-NEXT: s_endpgm
13define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
14bb:
15  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
16  %tmp1 = icmp ugt i32 %tmp, 1
17  br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
18
19bb.outer.then:                                    ; preds = %bb
20  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
21  store i32 0, i32 addrspace(1)* %tmp4, align 4
22  %tmp5 = icmp eq i32 %tmp, 2
23  br i1 %tmp5, label %bb.outer.end, label %bb.inner.then
24
25bb.inner.then:                                    ; preds = %bb.outer.then
26  %tmp7 = add i32 %tmp, 1
27  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
28  store i32 1, i32 addrspace(1)* %tmp9, align 4
29  br label %bb.outer.end
30
31bb.outer.end:                                     ; preds = %bb.outer.then, %bb.inner.then, %bb
32  ret void
33}
34
35; GCN-LABEL: {{^}}uncollapsable_nested_if:
36; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
37; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
38; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
39; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
40; GCN-NEXT: ; mask branch [[ENDIF_INNER:BB[0-9_]+]]
41; GCN-NEXT: {{^BB[0-9_]+}}:
42; GCN:      store_dword
43; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
44; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
45; GCN:      store_dword
46; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
47; GCN-NEXT: s_endpgm
48define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
49bb:
50  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
51  %tmp1 = icmp ugt i32 %tmp, 1
52  br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
53
54bb.outer.then:                                    ; preds = %bb
55  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
56  store i32 0, i32 addrspace(1)* %tmp4, align 4
57  %tmp5 = icmp eq i32 %tmp, 2
58  br i1 %tmp5, label %bb.inner.end, label %bb.inner.then
59
60bb.inner.then:                                    ; preds = %bb.outer.then
61  %tmp7 = add i32 %tmp, 1
62  %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
63  store i32 1, i32 addrspace(1)* %tmp8, align 4
64  br label %bb.inner.end
65
66bb.inner.end:                                     ; preds = %bb.inner.then, %bb.outer.then
67  %tmp9 = add i32 %tmp, 2
68  %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9
69  store i32 2, i32 addrspace(1)* %tmp10, align 4
70  br label %bb.outer.end
71
72bb.outer.end:                                     ; preds = %bb.inner.then, %bb
73  ret void
74}
75
76; GCN-LABEL: {{^}}nested_if_if_else:
77; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
78; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
79; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
80; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
81; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
82; GCN-NEXT: ; mask branch [[THEN_INNER:BB[0-9_]+]]
83; GCN-NEXT: {{^BB[0-9_]+}}:
84; GCN:      store_dword
85; GCN-NEXT: {{^}}[[THEN_INNER]]:
86; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]]
87; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]]
88; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
89; GCN:      store_dword
90; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
91; GCN-NEXT: s_endpgm
92define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
93bb:
94  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
95  %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
96  store i32 0, i32 addrspace(1)* %tmp1, align 4
97  %tmp2 = icmp ugt i32 %tmp, 1
98  br i1 %tmp2, label %bb.outer.then, label %bb.outer.end
99
100bb.outer.then:                                       ; preds = %bb
101  %tmp5 = icmp eq i32 %tmp, 2
102  br i1 %tmp5, label %bb.then, label %bb.else
103
104bb.then:                                             ; preds = %bb.outer.then
105  %tmp3 = add i32 %tmp, 1
106  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3
107  store i32 1, i32 addrspace(1)* %tmp4, align 4
108  br label %bb.outer.end
109
110bb.else:                                             ; preds = %bb.outer.then
111  %tmp7 = add i32 %tmp, 2
112  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
113  store i32 2, i32 addrspace(1)* %tmp9, align 4
114  br label %bb.outer.end
115
116bb.outer.end:                                        ; preds = %bb, %bb.then, %bb.else
117  ret void
118}
119
120; GCN-LABEL: {{^}}nested_if_else_if:
121; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
122; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
123; GCN-NEXT: ; mask branch [[THEN_OUTER:BB[0-9_]+]]
124; GCN-NEXT: s_cbranch_execz [[THEN_OUTER]]
125; GCN-NEXT: {{^BB[0-9_]+}}:
126; GCN:      store_dword
127; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
128; GCN-NEXT: ; mask branch [[THEN_OUTER_FLOW:BB[0-9_]+]]
129; GCN-NEXT: {{^BB[0-9_]+}}:
130; GCN:      store_dword
131; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
132; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
133; GCN-NEXT: {{^}}[[THEN_OUTER]]:
134; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
135; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
136; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
137; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
138; GCN-NEXT: {{^BB[0-9_]+}}:
139; GCN:      store_dword
140; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]]
141; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
142; GCN-NEXT: {{^BB[0-9_]+}}:
143; GCN:      store_dword
144; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
145; GCN-NEXT: s_endpgm
146define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
147bb:
148  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
149  %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
150  store i32 0, i32 addrspace(1)* %tmp1, align 4
151  %cc1 = icmp ugt i32 %tmp, 1
152  br i1 %cc1, label %bb.outer.then, label %bb.outer.else
153
154bb.outer.then:
155  %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1
156  store i32 1, i32 addrspace(1)* %tmp2, align 4
157  %cc2 = icmp eq i32 %tmp, 2
158  br i1 %cc2, label %bb.inner.then, label %bb.outer.end
159
160bb.inner.then:
161  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2
162  store i32 2, i32 addrspace(1)* %tmp3, align 4
163  br label %bb.outer.end
164
165bb.outer.else:
166  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3
167  store i32 3, i32 addrspace(1)* %tmp4, align 4
168  %cc3 = icmp eq i32 %tmp, 2
169  br i1 %cc3, label %bb.inner.then2, label %bb.outer.end
170
171bb.inner.then2:
172  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4
173  store i32 4, i32 addrspace(1)* %tmp5, align 4
174  br label %bb.outer.end
175
176bb.outer.end:
177  ret void
178}
179
180; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
181; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
182; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
183; GCN-NEXT: {{^BB[0-9_]+}}:
184; GCN:      store_dword
185; GCN-NEXT: {{^}}[[ENDIF]]:
186; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
187; GCN:      s_barrier
188; GCN-NEXT: s_endpgm
189define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
190bb:
191  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
192  %tmp1 = icmp ugt i32 %tmp, 1
193  br i1 %tmp1, label %bb.then, label %bb.end
194
195bb.then:                                          ; preds = %bb
196  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
197  store i32 0, i32 addrspace(1)* %tmp4, align 4
198  br label %bb.end
199
200bb.end:                                           ; preds = %bb.then, %bb
201  call void @llvm.amdgcn.s.barrier()
202  ret void
203}
204
205; Make sure scc liveness is updated if sor_b64 is removed
206; GCN-LABEL: {{^}}scc_liveness:
207
208; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
209; GCN: s_andn2_b64 exec, exec,
210; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
211
212; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
213; GCN: s_and_b64 exec, exec, {{vcc|s\[[0-9:]+\]}}
214
215; GCN-NOT: s_or_b64 exec, exec
216
217; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
218; GCN: s_andn2_b64
219; GCN-NEXT: s_cbranch_execnz
220
221; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
222; GCN: buffer_store_dword
223; GCN: buffer_store_dword
224; GCN: buffer_store_dword
225; GCN: buffer_store_dword
226; GCN: s_setpc_b64
227define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
228bb:
229  br label %bb1
230
231bb1:                                              ; preds = %Flow1, %bb1, %bb
232  %tmp = icmp slt i32 %arg, 519
233  br i1 %tmp, label %bb2, label %bb1
234
235bb2:                                              ; preds = %bb1
236  %tmp3 = icmp eq i32 %arg, 0
237  br i1 %tmp3, label %bb4, label %bb10
238
239bb4:                                              ; preds = %bb2
240  %tmp6 = load float, float addrspace(5)* undef
241  %tmp7 = fcmp olt float %tmp6, 0.0
242  br i1 %tmp7, label %bb8, label %Flow
243
244bb8:                                              ; preds = %bb4
245  %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1
246  br label %Flow
247
248Flow:                                             ; preds = %bb8, %bb4
249  %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ]
250  br label %bb10
251
252bb10:                                             ; preds = %Flow, %bb2
253  %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ]
254  br i1 %tmp3, label %bb12, label %Flow1
255
256Flow1:                                            ; preds = %bb10
257  br label %bb1
258
259bb12:                                             ; preds = %bb10
260  store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16
261  ret void
262}
263
264declare i32 @llvm.amdgcn.workitem.id.x() #0
265declare void @llvm.amdgcn.s.barrier() #1
266
267attributes #0 = { nounwind readnone speculatable }
268attributes #1 = { nounwind convergent }
269attributes #2 = { nounwind }
270