1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
2; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s
3
4; SI-LABEL: {{^}}infinite_loop:
5; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
6; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
7; SI: s_waitcnt lgkmcnt(0)
8; SI: buffer_store_dword [[REG]]
9; SI: s_branch [[LOOP]]
10define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) {
11entry:
12  br label %loop
13
14loop:
15  store volatile i32 999, i32 addrspace(1)* %out, align 4
16  br label %loop
17}
18
19
20; IR-LABEL: @infinite_loop_ret(
21; IR:  br i1 %cond, label %loop, label %UnifiedReturnBlock
22
23; IR: loop:
24; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
25; IR: br i1 true, label %loop, label %UnifiedReturnBlock
26
27; IR: UnifiedReturnBlock:
28; IR:  ret void
29
30
31; SI-LABEL: {{^}}infinite_loop_ret:
32; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]]
33
34; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
35; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
36; SI: s_and_b64 vcc, exec, -1
37; SI: s_waitcnt lgkmcnt(0)
38; SI: buffer_store_dword [[REG]]
39; SI: s_cbranch_vccnz [[LOOP]]
40
41; SI: [[RET]]:  ; %UnifiedReturnBlock
42; SI: s_endpgm
43define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) {
44entry:
45  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
46  %cond = icmp eq i32 %tmp, 1
47  br i1 %cond, label %loop, label %return
48
49loop:
50  store volatile i32 999, i32 addrspace(1)* %out, align 4
51  br label %loop
52
53return:
54  ret void
55}
56
57
58; IR-LABEL: @infinite_loops(
59; IR: br i1 undef, label %loop1, label %loop2
60
61; IR: loop1:
62; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
63; IR: br i1 true, label %loop1, label %DummyReturnBlock
64
65; IR: loop2:
66; IR: store volatile i32 888, i32 addrspace(1)* %out, align 4
67; IR: br i1 true, label %loop2, label %DummyReturnBlock
68
69; IR: DummyReturnBlock:
70; IR: ret void
71
72
73; SI-LABEL: {{^}}infinite_loops:
74
75; SI: v_mov_b32_e32 [[REG1:v[0-9]+]], 0x3e7
76; SI: s_and_b64 vcc, exec, -1
77
78; SI: [[LOOP1:BB[0-9]+_[0-9]+]]:  ; %loop1
79; SI: s_waitcnt lgkmcnt(0)
80; SI: buffer_store_dword [[REG1]]
81; SI: s_cbranch_vccnz [[LOOP1]]
82; SI: s_branch [[RET:BB[0-9]+_[0-9]+]]
83
84; SI: v_mov_b32_e32 [[REG2:v[0-9]+]], 0x378
85; SI: s_and_b64 vcc, exec, -1
86
87; SI: [[LOOP2:BB[0-9]+_[0-9]+]]:  ; %loop2
88; SI: s_waitcnt lgkmcnt(0)
89; SI: buffer_store_dword [[REG2]]
90; SI: s_cbranch_vccnz [[LOOP2]]
91
92; SI: [[RET]]:  ; %DummyReturnBlock
93; SI: s_endpgm
94define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) {
95entry:
96  br i1 undef, label %loop1, label %loop2
97
98loop1:
99  store volatile i32 999, i32 addrspace(1)* %out, align 4
100  br label %loop1
101
102loop2:
103  store volatile i32 888, i32 addrspace(1)* %out, align 4
104  br label %loop2
105}
106
107
108
109; IR-LABEL: @infinite_loop_nest_ret(
110; IR: br i1 %cond1, label %outer_loop, label %UnifiedReturnBlock
111
112; IR: outer_loop:
113; IR: br label %inner_loop
114
115; IR: inner_loop:
116; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
117; IR: %cond3 = icmp eq i32 %tmp, 3
118; IR: br i1 true, label %TransitionBlock, label %UnifiedReturnBlock
119
120; IR: TransitionBlock:
121; IR: br i1 %cond3, label %inner_loop, label %outer_loop
122
123; IR: UnifiedReturnBlock:
124; IR: ret void
125
126; SI-LABEL: {{^}}infinite_loop_nest_ret:
127; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]]
128
129; SI: s_mov_b32
130; SI: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]:  ; %outer_loop
131
132; SI: [[INNER_LOOP:BB[0-9]+_[0-9]+]]:  ; %inner_loop
133; SI: s_waitcnt expcnt(0)
134; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
135; SI: s_waitcnt lgkmcnt(0)
136; SI: buffer_store_dword [[REG]]
137
138; SI: s_andn2_b64 exec
139; SI: s_cbranch_execnz [[INNER_LOOP]]
140
141; SI: s_andn2_b64 exec
142; SI: s_cbranch_execnz [[OUTER_LOOP]]
143
144; SI: [[RET]]:  ; %UnifiedReturnBlock
145; SI: s_endpgm
146define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
147entry:
148  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
149  %cond1 = icmp eq i32 %tmp, 1
150  br i1 %cond1, label %outer_loop, label %return
151
152outer_loop:
153 ; %cond2 = icmp eq i32 %tmp, 2
154 ; br i1 %cond2, label %outer_loop, label %inner_loop
155 br label %inner_loop
156
157inner_loop:                                     ; preds = %LeafBlock, %LeafBlock1
158  store volatile i32 999, i32 addrspace(1)* %out, align 4
159  %cond3 = icmp eq i32 %tmp, 3
160  br i1 %cond3, label %inner_loop, label %outer_loop
161
162return:
163  ret void
164}
165
166declare i32 @llvm.amdgcn.workitem.id.x()
167