1; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}uniform_if_scc:
5; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
6; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
7; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
8
9; Fall-through to the else
10; GCN: s_mov_b32 [[S_VAL]], 1
11
12; GCN: [[IF_LABEL]]:
13; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
14; GCN: buffer_store_dword [[V_VAL]]
15define amdgpu_kernel void @uniform_if_scc(i32 %cond, i32 addrspace(1)* %out) {
16entry:
17  %cmp0 = icmp eq i32 %cond, 0
18  br i1 %cmp0, label %if, label %else
19
20if:
21  br label %done
22
23else:
24  br label %done
25
26done:
27  %value = phi i32 [0, %if], [1, %else]
28  store i32 %value, i32 addrspace(1)* %out
29  ret void
30}
31
32; GCN-LABEL: {{^}}uniform_if_vcc:
33; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
34; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
35; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
36
37; Fall-through to the else
38; GCN: s_mov_b32 [[S_VAL]], 1
39
40; GCN: [[IF_LABEL]]:
41; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
42; GCN: buffer_store_dword [[V_VAL]]
43define amdgpu_kernel void @uniform_if_vcc(float %cond, i32 addrspace(1)* %out) {
44entry:
45  %cmp0 = fcmp oeq float %cond, 0.0
46  br i1 %cmp0, label %if, label %else
47
48if:
49  br label %done
50
51else:
52  br label %done
53
54done:
55  %value = phi i32 [0, %if], [1, %else]
56  store i32 %value, i32 addrspace(1)* %out
57  ret void
58}
59
60; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc:
61; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
62; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
63; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
64
65; Fall-through to the else
66; GCN: s_mov_b32 [[S_VAL]], 1
67
68; GCN: [[IF_LABEL]]:
69; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
70; GCN: buffer_store_dword [[V_VAL]]
71define amdgpu_kernel void @uniform_if_swap_br_targets_scc(i32 %cond, i32 addrspace(1)* %out) {
72entry:
73  %cmp0 = icmp eq i32 %cond, 0
74  br i1 %cmp0, label %else, label %if
75
76if:
77  br label %done
78
79else:
80  br label %done
81
82done:
83  %value = phi i32 [0, %if], [1, %else]
84  store i32 %value, i32 addrspace(1)* %out
85  ret void
86}
87
88; GCN-LABEL: {{^}}uniform_if_swap_br_targets_vcc:
89; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
90; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
91; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
92
93; Fall-through to the else
94; GCN: s_mov_b32 [[S_VAL]], 1
95
96; GCN: [[IF_LABEL]]:
97; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
98; GCN: buffer_store_dword [[V_VAL]]
99define amdgpu_kernel void @uniform_if_swap_br_targets_vcc(float %cond, i32 addrspace(1)* %out) {
100entry:
101  %cmp0 = fcmp oeq float %cond, 0.0
102  br i1 %cmp0, label %else, label %if
103
104if:
105  br label %done
106
107else:
108  br label %done
109
110done:
111  %value = phi i32 [0, %if], [1, %else]
112  store i32 %value, i32 addrspace(1)* %out
113  ret void
114}
115
116; GCN-LABEL: {{^}}uniform_if_move_valu:
117; GCN: v_add_f32_e32 [[CMP:v[0-9]+]]
118; Using a floating-point value in an integer compare will cause the compare to
119; be selected for the SALU and then later moved to the VALU.
120; GCN: v_cmp_ne_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]]
121; GCN: s_and_b64 vcc, exec, [[COND]]
122; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
123; GCN: buffer_store_dword
124; GCN: [[ENDIF_LABEL]]:
125; GCN: s_endpgm
126define amdgpu_kernel void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) {
127entry:
128  %a.0 = fadd float %a, 10.0
129  %cond = bitcast float %a.0 to i32
130  %cmp = icmp eq i32 %cond, 5
131  br i1 %cmp, label %if, label %endif
132
133if:
134  store i32 0, i32 addrspace(1)* %out
135  br label %endif
136
137endif:
138  ret void
139}
140
141; GCN-LABEL: {{^}}uniform_if_move_valu_commute:
142; GCN: v_add_f32_e32 [[CMP:v[0-9]+]]
143; Using a floating-point value in an integer compare will cause the compare to
144; be selected for the SALU and then later moved to the VALU.
145; GCN: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]]
146; GCN: s_and_b64 vcc, exec, [[COND]]
147; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
148; GCN: buffer_store_dword
149; GCN: [[ENDIF_LABEL]]:
150; GCN: s_endpgm
151define amdgpu_kernel void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) {
152entry:
153  %a.0 = fadd float %a, 10.0
154  %cond = bitcast float %a.0 to i32
155  %cmp = icmp ugt i32 %cond, 5
156  br i1 %cmp, label %if, label %endif
157
158if:
159  store i32 0, i32 addrspace(1)* %out
160  br label %endif
161
162endif:
163  ret void
164}
165
166
167; GCN-LABEL: {{^}}uniform_if_else_ret:
168; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
169; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
170
171; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
172; GCN: buffer_store_dword [[TWO]]
173; GCN: s_endpgm
174
175; GCN: {{^}}[[IF_LABEL]]:
176; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
177; GCN: buffer_store_dword [[ONE]]
178; GCN: s_endpgm
179define amdgpu_kernel void @uniform_if_else_ret(i32 addrspace(1)* nocapture %out, i32 %a) {
180entry:
181  %cmp = icmp eq i32 %a, 0
182  br i1 %cmp, label %if.then, label %if.else
183
184if.then:                                          ; preds = %entry
185  store i32 1, i32 addrspace(1)* %out
186  br label %if.end
187
188if.else:                                          ; preds = %entry
189  store i32 2, i32 addrspace(1)* %out
190  br label %if.end
191
192if.end:                                           ; preds = %if.else, %if.then
193  ret void
194}
195
196; GCN-LABEL: {{^}}uniform_if_else:
197; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
198; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
199
200; GCN: v_mov_b32_e32 [[IMM_REG:v[0-9]+]], 2
201; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
202
203; GCN: [[IF_LABEL]]:
204; GCN-NEXT: v_mov_b32_e32 [[IMM_REG]], 1
205
206; GCN-NEXT: [[ENDIF_LABEL]]:
207; GCN: buffer_store_dword [[IMM_REG]]
208
209; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
210; GCN: buffer_store_dword [[THREE]]
211; GCN: s_endpgm
212define amdgpu_kernel void @uniform_if_else(i32 addrspace(1)* nocapture %out0, i32 addrspace(1)* nocapture %out1, i32 %a) {
213entry:
214  %cmp = icmp eq i32 %a, 0
215  br i1 %cmp, label %if.then, label %if.else
216
217if.then:                                          ; preds = %entry
218  store i32 1, i32 addrspace(1)* %out0
219  br label %if.end
220
221if.else:                                          ; preds = %entry
222  store i32 2, i32 addrspace(1)* %out0
223  br label %if.end
224
225if.end:                                           ; preds = %if.else, %if.then
226  store i32 3, i32 addrspace(1)* %out1
227  ret void
228}
229
230; GCN-LABEL: {{^}}icmp_2_users:
231; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 1
232; GCN: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]]
233; GCN: buffer_store_dword
234; GCN: [[LABEL]]:
235; GCN: s_endpgm
236define amdgpu_kernel void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) {
237main_body:
238  %0 = icmp sgt i32 %cond, 0
239  %1 = sext i1 %0 to i32
240  br i1 %0, label %IF, label %ENDIF
241
242IF:
243  store i32 %1, i32 addrspace(1)* %out
244  br label %ENDIF
245
246ENDIF:                                            ; preds = %IF, %main_body
247  ret void
248}
249
250; GCN-LABEL: {{^}}icmp_users_different_blocks:
251; GCN: s_load_dwordx2 s{{\[}}[[COND0:[0-9]+]]:[[COND1:[0-9]+]]{{\]}}
252; GCN: s_cmp_lt_i32 s[[COND0]], 1
253; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
254; GCN: v_cmp_gt_i32_e64 {{[^,]*}}, s[[COND1]], 0{{$}}
255; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]]
256; GCN: {{^}}[[EXIT]]:
257; GCN: s_endpgm
258; GCN: {{^}}[[BODY]]:
259; GCN: buffer_store
260; GCN: s_endpgm
261define amdgpu_kernel void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) {
262bb:
263  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
264  %cmp0 = icmp sgt i32 %cond0, 0
265  %cmp1 = icmp sgt i32 %cond1, 0
266  br i1 %cmp0, label %bb2, label %bb9
267
268bb2:                                              ; preds = %bb
269  %tmp2 = sext i1 %cmp1 to i32
270  %tmp3 = add i32 %tmp2, %tmp
271  br i1 %cmp1, label %bb9, label %bb7
272
273bb7:                                              ; preds = %bb5
274  store i32 %tmp3, i32 addrspace(1)* %out
275  br label %bb9
276
277bb9:                                              ; preds = %bb8, %bb4
278  ret void
279}
280
281; SI-LABEL: {{^}}uniform_loop:
282; SI: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]:
283; SI: s_add_i32 [[I:s[0-9]+]],  s{{[0-9]+}}, -1
284; SI: s_cmp_lg_u32 [[I]], 0
285; SI: s_cbranch_scc1 [[LOOP_LABEL]]
286; SI: s_endpgm
287define amdgpu_kernel void @uniform_loop(i32 addrspace(1)* %out, i32 %a) {
288entry:
289  br label %loop
290
291loop:
292  %i = phi i32 [0, %entry], [%i.i, %loop]
293  %i.i = add i32 %i, 1
294  %cmp = icmp eq i32 %a, %i.i
295  br i1 %cmp, label %done, label %loop
296
297done:
298  ret void
299}
300
301; Test uniform and divergent.
302
303; GCN-LABEL: {{^}}uniform_inside_divergent:
304; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
305; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
306; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0
307; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]]
308; GCN: s_endpgm
309; GCN: {{^}}[[IF_UNIFORM_LABEL]]:
310; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
311; GCN: buffer_store_dword [[ONE]]
312define amdgpu_kernel void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) {
313entry:
314  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
315  %d_cmp = icmp ult i32 %tid, 16
316  br i1 %d_cmp, label %if, label %endif
317
318if:
319  store i32 0, i32 addrspace(1)* %out
320  %u_cmp = icmp eq i32 %cond, 0
321  br i1 %u_cmp, label %if_uniform, label %endif
322
323if_uniform:
324  store i32 1, i32 addrspace(1)* %out
325  br label %endif
326
327endif:
328  ret void
329}
330
331; GCN-LABEL: {{^}}divergent_inside_uniform:
332; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
333; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
334; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
335; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
336; GCN: ; mask branch [[ENDIF_LABEL]]
337; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
338; GCN: buffer_store_dword [[ONE]]
339; GCN: [[ENDIF_LABEL]]:
340; GCN: s_endpgm
341define amdgpu_kernel void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
342entry:
343  %u_cmp = icmp eq i32 %cond, 0
344  br i1 %u_cmp, label %if, label %endif
345
346if:
347  store i32 0, i32 addrspace(1)* %out
348  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
349  %d_cmp = icmp ult i32 %tid, 16
350  br i1 %d_cmp, label %if_uniform, label %endif
351
352if_uniform:
353  store i32 1, i32 addrspace(1)* %out
354  br label %endif
355
356endif:
357  ret void
358}
359
360; GCN-LABEL: {{^}}divergent_if_uniform_if:
361; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
362; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
363; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
364; GCN: buffer_store_dword [[ONE]]
365; GCN: s_or_b64 exec, exec, [[MASK]]
366; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
367; GCN: s_cbranch_scc0 [[IF_UNIFORM:[A-Z0-9_]+]]
368; GCN: s_endpgm
369; GCN: [[IF_UNIFORM]]:
370; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
371; GCN: buffer_store_dword [[TWO]]
372define amdgpu_kernel void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) {
373entry:
374  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
375  %d_cmp = icmp eq i32 %tid, 0
376  br i1 %d_cmp, label %if, label %endif
377
378if:
379  store i32 1, i32 addrspace(1)* %out
380  br label %endif
381
382endif:
383  %u_cmp = icmp eq i32 %cond, 0
384  br i1 %u_cmp, label %if_uniform, label %exit
385
386if_uniform:
387  store i32 2, i32 addrspace(1)* %out
388  br label %exit
389
390exit:
391  ret void
392}
393
394; The condition of the branches in the two blocks are
395; uniform. MachineCSE replaces the 2nd condition with the inverse of
396; the first, leaving an scc use in a different block than it was
397; defed.
398
399; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks:
400; GCN: s_load_dword [[COND:s[0-9]+]]
401; GCN: s_cmp_lt_i32 [[COND]], 1
402; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3
403
404; GCN: %bb.1:
405; GCN-NOT: cmp
406; GCN: buffer_load_dword
407; GCN: buffer_store_dword
408; GCN: s_cbranch_scc1 BB[[FNNUM]]_3
409
410; GCN: BB[[FNNUM]]_3:
411; GCN: s_endpgm
412define amdgpu_kernel void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) {
413bb:
414  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
415  %tmp1 = icmp sgt i32 %cond, 0
416  br i1 %tmp1, label %bb2, label %bb9
417
418bb2:                                              ; preds = %bb
419  %tmp3 = load volatile i32, i32 addrspace(1)* undef
420  store volatile i32 0, i32 addrspace(1)* undef
421  %tmp9 = icmp sle i32 %cond, 0
422  br i1 %tmp9, label %bb9, label %bb7
423
424bb7:                                              ; preds = %bb5
425  store i32 %tmp3, i32 addrspace(1)* %out
426  br label %bb9
427
428bb9:                                              ; preds = %bb8, %bb4
429  ret void
430}
431
432; GCN-LABEL: {{^}}uniform_if_scc_i64_eq:
433; VI-DAG: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 0
434; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
435; SI-DAG: v_cmp_eq_u64_e64
436; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
437
438; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
439
440; Fall-through to the else
441; GCN: s_mov_b32 [[S_VAL]], 1
442
443; GCN: [[IF_LABEL]]:
444; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
445; GCN: buffer_store_dword [[V_VAL]]
446define amdgpu_kernel void @uniform_if_scc_i64_eq(i64 %cond, i32 addrspace(1)* %out) {
447entry:
448  %cmp0 = icmp eq i64 %cond, 0
449  br i1 %cmp0, label %if, label %else
450
451if:
452  br label %done
453
454else:
455  br label %done
456
457done:
458  %value = phi i32 [0, %if], [1, %else]
459  store i32 %value, i32 addrspace(1)* %out
460  ret void
461}
462
463; GCN-LABEL: {{^}}uniform_if_scc_i64_ne:
464; VI-DAG: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 0
465; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
466
467; SI-DAG: v_cmp_ne_u64_e64
468; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
469
470; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
471
472; Fall-through to the else
473; GCN: s_mov_b32 [[S_VAL]], 1
474
475; GCN: [[IF_LABEL]]:
476; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
477; GCN: buffer_store_dword [[V_VAL]]
478define amdgpu_kernel void @uniform_if_scc_i64_ne(i64 %cond, i32 addrspace(1)* %out) {
479entry:
480  %cmp0 = icmp ne i64 %cond, 0
481  br i1 %cmp0, label %if, label %else
482
483if:
484  br label %done
485
486else:
487  br label %done
488
489done:
490  %value = phi i32 [0, %if], [1, %else]
491  store i32 %value, i32 addrspace(1)* %out
492  ret void
493}
494
495; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt:
496; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
497; GCN-DAG: v_cmp_gt_i64_e64
498; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
499
500; Fall-through to the else
501; GCN: s_mov_b32 [[S_VAL]], 1
502
503; GCN: [[IF_LABEL]]:
504; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
505; GCN: buffer_store_dword [[V_VAL]]
506define amdgpu_kernel void @uniform_if_scc_i64_sgt(i64 %cond, i32 addrspace(1)* %out) {
507entry:
508  %cmp0 = icmp sgt i64 %cond, 0
509  br i1 %cmp0, label %if, label %else
510
511if:
512  br label %done
513
514else:
515  br label %done
516
517done:
518  %value = phi i32 [0, %if], [1, %else]
519  store i32 %value, i32 addrspace(1)* %out
520  ret void
521}
522
523; GCN-LABEL: {{^}}move_to_valu_i64_eq:
524; GCN: v_cmp_eq_u64_e32
525define amdgpu_kernel void @move_to_valu_i64_eq(i32 addrspace(1)* %out) {
526  %cond = load volatile i64, i64 addrspace(3)* undef
527  %cmp0 = icmp eq i64 %cond, 0
528  br i1 %cmp0, label %if, label %else
529
530if:
531  br label %done
532
533else:
534  br label %done
535
536done:
537  %value = phi i32 [0, %if], [1, %else]
538  store i32 %value, i32 addrspace(1)* %out
539  ret void
540}
541
542; GCN-LABEL: {{^}}move_to_valu_i64_ne:
543; GCN: v_cmp_ne_u64_e32
544define amdgpu_kernel void @move_to_valu_i64_ne(i32 addrspace(1)* %out) {
545  %cond = load volatile i64, i64 addrspace(3)* undef
546  %cmp0 = icmp ne i64 %cond, 0
547  br i1 %cmp0, label %if, label %else
548
549if:
550  br label %done
551
552else:
553  br label %done
554
555done:
556  %value = phi i32 [0, %if], [1, %else]
557  store i32 %value, i32 addrspace(1)* %out
558  ret void
559}
560
561; GCN-LABEL: {{^}}move_to_valu_vgpr_operand_phi:
562; GCN: v_add_{{[iu]}}32_e32
563; GCN: ds_write_b32
564define void @move_to_valu_vgpr_operand_phi(i32 addrspace(3)* %out) {
565bb0:
566  br label %bb1
567
568bb1:                                              ; preds = %bb3, %bb0
569  %tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ]
570  %tmp1 = add nsw i32 %tmp0, -1
571  %tmp2 = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tmp1
572  br i1 undef, label %bb2, label %bb3
573
574bb2:                                              ; preds = %bb1
575  store volatile i32 1, i32 addrspace(3)* %tmp2, align 4
576  br label %bb3
577
578bb3:                                              ; preds = %bb2, %bb1
579  %tmp4 = add nsw i32 %tmp0, 2
580  br label %bb1
581}
582
583declare i32 @llvm.amdgcn.workitem.id.x() #0
584
585attributes #0 = { nounwind readnone }
586