1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
4; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
5
6; Uses llvm.amdgcn.break
7
8define amdgpu_kernel void @break_loop(i32 %arg) #0 {
9; OPT-LABEL: @break_loop(
10; OPT-NEXT:  bb:
11; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
12; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
13; OPT-NEXT:    br label [[BB1:%.*]]
14; OPT:       bb1:
15; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
16; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
17; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
18; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
19; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
20; OPT:       bb4:
21; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
22; OPT-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]]
23; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[CMP1]], true
24; OPT-NEXT:    br label [[FLOW]]
25; OPT:       Flow:
26; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ]
27; OPT-NEXT:    [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
28; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
29; OPT-NEXT:    br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
30; OPT:       bb9:
31; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
32; OPT-NEXT:    ret void
33;
34; GCN-LABEL: break_loop:
35; GCN:       ; %bb.0: ; %bb
36; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
37; GCN-NEXT:    s_mov_b64 s[0:1], 0
38; GCN-NEXT:    s_mov_b32 s2, -1
39; GCN-NEXT:    s_waitcnt lgkmcnt(0)
40; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
41; GCN-NEXT:    s_mov_b32 s3, 0xf000
42; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
43; GCN-NEXT:    ; implicit-def: $sgpr6
44; GCN-NEXT:  BB0_1: ; %bb1
45; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
46; GCN-NEXT:    s_add_i32 s6, s6, 1
47; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
48; GCN-NEXT:    s_cmp_gt_i32 s6, -1
49; GCN-NEXT:    s_cbranch_scc1 BB0_3
50; GCN-NEXT:  ; %bb.2: ; %bb4
51; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
52; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0
53; GCN-NEXT:    s_waitcnt vmcnt(0)
54; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
55; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
56; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
57; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
58; GCN-NEXT:  BB0_3: ; %Flow
59; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
60; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
61; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
62; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
63; GCN-NEXT:    s_cbranch_execnz BB0_1
64; GCN-NEXT:  ; %bb.4: ; %bb9
65; GCN-NEXT:    s_endpgm
66bb:
67  %id = call i32 @llvm.amdgcn.workitem.id.x()
68  %my.tmp = sub i32 %id, %arg
69  br label %bb1
70
71bb1:
72  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
73  %lsr.iv.next = add i32 %lsr.iv, 1
74  %cmp0 = icmp slt i32 %lsr.iv.next, 0
75  br i1 %cmp0, label %bb4, label %bb9
76
77bb4:
78  %load = load volatile i32, i32 addrspace(1)* undef, align 4
79  %cmp1 = icmp slt i32 %my.tmp, %load
80  br i1 %cmp1, label %bb1, label %bb9
81
82bb9:
83  ret void
84}
85
86define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
87; OPT-LABEL: @undef_phi_cond_break_loop(
88; OPT-NEXT:  bb:
89; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
90; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
91; OPT-NEXT:    br label [[BB1:%.*]]
92; OPT:       bb1:
93; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
94; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
95; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
96; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
97; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
98; OPT:       bb4:
99; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
100; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
101; OPT-NEXT:    br label [[FLOW]]
102; OPT:       Flow:
103; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
104; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
105; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
106; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
107; OPT:       bb9:
108; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
109; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
110; OPT-NEXT:    ret void
111;
112; GCN-LABEL: undef_phi_cond_break_loop:
113; GCN:       ; %bb.0: ; %bb
114; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
115; GCN-NEXT:    s_mov_b64 s[0:1], 0
116; GCN-NEXT:    s_mov_b32 s2, -1
117; GCN-NEXT:    s_waitcnt lgkmcnt(0)
118; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
119; GCN-NEXT:    s_mov_b32 s3, 0xf000
120; GCN-NEXT:    ; implicit-def: $sgpr6_sgpr7
121; GCN-NEXT:    ; implicit-def: $sgpr4
122; GCN-NEXT:  BB1_1: ; %bb1
123; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
124; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
125; GCN-NEXT:    s_and_b64 s[8:9], s[0:1], exec
126; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
127; GCN-NEXT:    s_cmp_gt_i32 s4, -1
128; GCN-NEXT:    s_cbranch_scc1 BB1_3
129; GCN-NEXT:  ; %bb.2: ; %bb4
130; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
131; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0
132; GCN-NEXT:    s_waitcnt vmcnt(0)
133; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
134; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
135; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
136; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
137; GCN-NEXT:  BB1_3: ; %Flow
138; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
139; GCN-NEXT:    s_add_i32 s4, s4, 1
140; GCN-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
141; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
142; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
143; GCN-NEXT:    s_cbranch_execnz BB1_1
144; GCN-NEXT:  ; %bb.4: ; %bb9
145; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
146; GCN-NEXT:    v_mov_b32_e32 v0, 7
147; GCN-NEXT:    s_mov_b32 m0, -1
148; GCN-NEXT:    ds_write_b32 v0, v0
149; GCN-NEXT:    s_endpgm
150bb:
151  %id = call i32 @llvm.amdgcn.workitem.id.x()
152  %my.tmp = sub i32 %id, %arg
153  br label %bb1
154
155bb1:                                              ; preds = %Flow, %bb
156  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
157  %lsr.iv.next = add i32 %lsr.iv, 1
158  %cmp0 = icmp slt i32 %lsr.iv.next, 0
159  br i1 %cmp0, label %bb4, label %Flow
160
161bb4:                                              ; preds = %bb1
162  %load = load volatile i32, i32 addrspace(1)* undef, align 4
163  %cmp1 = icmp sge i32 %my.tmp, %load
164  br label %Flow
165
166Flow:                                             ; preds = %bb4, %bb1
167  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
168  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
169  br i1 %my.tmp3, label %bb9, label %bb1
170
171bb9:                                              ; preds = %Flow
172  store volatile i32 7, i32 addrspace(3)* undef
173  ret void
174}
175
176; FIXME: ConstantExpr compare of address to null folds away
177@lds = addrspace(3) global i32 undef
178
179define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
180; OPT-LABEL: @constexpr_phi_cond_break_loop(
181; OPT-NEXT:  bb:
182; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
183; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
184; OPT-NEXT:    br label [[BB1:%.*]]
185; OPT:       bb1:
186; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
187; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
188; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
189; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
190; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
191; OPT:       bb4:
192; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
193; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
194; OPT-NEXT:    br label [[FLOW]]
195; OPT:       Flow:
196; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ]
197; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
198; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
199; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
200; OPT:       bb9:
201; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
202; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
203; OPT-NEXT:    ret void
204;
205; GCN-LABEL: constexpr_phi_cond_break_loop:
206; GCN:       ; %bb.0: ; %bb
207; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
208; GCN-NEXT:    s_mov_b64 s[0:1], 0
209; GCN-NEXT:    s_mov_b32 s2, lds@abs32@lo
210; GCN-NEXT:    s_mov_b32 s6, -1
211; GCN-NEXT:    s_waitcnt lgkmcnt(0)
212; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
213; GCN-NEXT:    s_mov_b32 s7, 0xf000
214; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
215; GCN-NEXT:    ; implicit-def: $sgpr3
216; GCN-NEXT:  BB2_1: ; %bb1
217; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
218; GCN-NEXT:    v_cmp_ne_u32_e64 s[8:9], s2, 4
219; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
220; GCN-NEXT:    s_and_b64 s[8:9], s[8:9], exec
221; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
222; GCN-NEXT:    s_cmp_gt_i32 s3, -1
223; GCN-NEXT:    s_cbranch_scc1 BB2_3
224; GCN-NEXT:  ; %bb.2: ; %bb4
225; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
226; GCN-NEXT:    buffer_load_dword v1, off, s[4:7], 0
227; GCN-NEXT:    s_waitcnt vmcnt(0)
228; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
229; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
230; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
231; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
232; GCN-NEXT:  BB2_3: ; %Flow
233; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
234; GCN-NEXT:    s_add_i32 s3, s3, 1
235; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
236; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
237; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
238; GCN-NEXT:    s_cbranch_execnz BB2_1
239; GCN-NEXT:  ; %bb.4: ; %bb9
240; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
241; GCN-NEXT:    v_mov_b32_e32 v0, 7
242; GCN-NEXT:    s_mov_b32 m0, -1
243; GCN-NEXT:    ds_write_b32 v0, v0
244; GCN-NEXT:    s_endpgm
245bb:
246  %id = call i32 @llvm.amdgcn.workitem.id.x()
247  %my.tmp = sub i32 %id, %arg
248  br label %bb1
249
250bb1:                                              ; preds = %Flow, %bb
251  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
252  %lsr.iv.next = add i32 %lsr.iv, 1
253  %cmp0 = icmp slt i32 %lsr.iv.next, 0
254  br i1 %cmp0, label %bb4, label %Flow
255
256bb4:                                              ; preds = %bb1
257  %load = load volatile i32, i32 addrspace(1)* undef, align 4
258  %cmp1 = icmp sge i32 %my.tmp, %load
259  br label %Flow
260
261Flow:                                             ; preds = %bb4, %bb1
262  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
263  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
264  br i1 %my.tmp3, label %bb9, label %bb1
265
266bb9:                                              ; preds = %Flow
267  store volatile i32 7, i32 addrspace(3)* undef
268  ret void
269}
270
271define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
272; OPT-LABEL: @true_phi_cond_break_loop(
273; OPT-NEXT:  bb:
274; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
275; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
276; OPT-NEXT:    br label [[BB1:%.*]]
277; OPT:       bb1:
278; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
279; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
280; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
281; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
282; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
283; OPT:       bb4:
284; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
285; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
286; OPT-NEXT:    br label [[FLOW]]
287; OPT:       Flow:
288; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
289; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
290; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
291; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
292; OPT:       bb9:
293; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
294; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
295; OPT-NEXT:    ret void
296;
297; GCN-LABEL: true_phi_cond_break_loop:
298; GCN:       ; %bb.0: ; %bb
299; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
300; GCN-NEXT:    s_mov_b64 s[0:1], 0
301; GCN-NEXT:    s_mov_b32 s2, -1
302; GCN-NEXT:    s_waitcnt lgkmcnt(0)
303; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
304; GCN-NEXT:    s_mov_b32 s3, 0xf000
305; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
306; GCN-NEXT:    ; implicit-def: $sgpr6
307; GCN-NEXT:  BB3_1: ; %bb1
308; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
309; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
310; GCN-NEXT:    s_cmp_gt_i32 s6, -1
311; GCN-NEXT:    s_cbranch_scc1 BB3_3
312; GCN-NEXT:  ; %bb.2: ; %bb4
313; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
314; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0
315; GCN-NEXT:    s_waitcnt vmcnt(0)
316; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
317; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
318; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
319; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
320; GCN-NEXT:  BB3_3: ; %Flow
321; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
322; GCN-NEXT:    s_add_i32 s6, s6, 1
323; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
324; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
325; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
326; GCN-NEXT:    s_cbranch_execnz BB3_1
327; GCN-NEXT:  ; %bb.4: ; %bb9
328; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
329; GCN-NEXT:    v_mov_b32_e32 v0, 7
330; GCN-NEXT:    s_mov_b32 m0, -1
331; GCN-NEXT:    ds_write_b32 v0, v0
332; GCN-NEXT:    s_endpgm
333bb:
334  %id = call i32 @llvm.amdgcn.workitem.id.x()
335  %my.tmp = sub i32 %id, %arg
336  br label %bb1
337
338bb1:                                              ; preds = %Flow, %bb
339  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
340  %lsr.iv.next = add i32 %lsr.iv, 1
341  %cmp0 = icmp slt i32 %lsr.iv.next, 0
342  br i1 %cmp0, label %bb4, label %Flow
343
344bb4:                                              ; preds = %bb1
345  %load = load volatile i32, i32 addrspace(1)* undef, align 4
346  %cmp1 = icmp sge i32 %my.tmp, %load
347  br label %Flow
348
349Flow:                                             ; preds = %bb4, %bb1
350  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
351  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
352  br i1 %my.tmp3, label %bb9, label %bb1
353
354bb9:                                              ; preds = %Flow
355  store volatile i32 7, i32 addrspace(3)* undef
356  ret void
357}
358
359define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
360; OPT-LABEL: @false_phi_cond_break_loop(
361; OPT-NEXT:  bb:
362; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
363; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
364; OPT-NEXT:    br label [[BB1:%.*]]
365; OPT:       bb1:
366; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
367; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
368; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
369; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
370; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
371; OPT:       bb4:
372; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
373; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
374; OPT-NEXT:    br label [[FLOW]]
375; OPT:       Flow:
376; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
377; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
378; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
379; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
380; OPT:       bb9:
381; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
382; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
383; OPT-NEXT:    ret void
384;
385; GCN-LABEL: false_phi_cond_break_loop:
386; GCN:       ; %bb.0: ; %bb
387; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
388; GCN-NEXT:    s_mov_b64 s[0:1], 0
389; GCN-NEXT:    s_mov_b32 s2, -1
390; GCN-NEXT:    s_waitcnt lgkmcnt(0)
391; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
392; GCN-NEXT:    s_mov_b32 s3, 0xf000
393; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
394; GCN-NEXT:    ; implicit-def: $sgpr6
395; GCN-NEXT:  BB4_1: ; %bb1
396; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
397; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
398; GCN-NEXT:    s_cmp_gt_i32 s6, -1
399; GCN-NEXT:    s_cbranch_scc1 BB4_3
400; GCN-NEXT:  ; %bb.2: ; %bb4
401; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
402; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0
403; GCN-NEXT:    s_waitcnt vmcnt(0)
404; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
405; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
406; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
407; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
408; GCN-NEXT:  BB4_3: ; %Flow
409; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
410; GCN-NEXT:    s_add_i32 s6, s6, 1
411; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
412; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
413; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
414; GCN-NEXT:    s_cbranch_execnz BB4_1
415; GCN-NEXT:  ; %bb.4: ; %bb9
416; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
417; GCN-NEXT:    v_mov_b32_e32 v0, 7
418; GCN-NEXT:    s_mov_b32 m0, -1
419; GCN-NEXT:    ds_write_b32 v0, v0
420; GCN-NEXT:    s_endpgm
421bb:
422  %id = call i32 @llvm.amdgcn.workitem.id.x()
423  %my.tmp = sub i32 %id, %arg
424  br label %bb1
425
426bb1:                                              ; preds = %Flow, %bb
427  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
428  %lsr.iv.next = add i32 %lsr.iv, 1
429  %cmp0 = icmp slt i32 %lsr.iv.next, 0
430  br i1 %cmp0, label %bb4, label %Flow
431
432bb4:                                              ; preds = %bb1
433  %load = load volatile i32, i32 addrspace(1)* undef, align 4
434  %cmp1 = icmp sge i32 %my.tmp, %load
435  br label %Flow
436
437Flow:                                             ; preds = %bb4, %bb1
438  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
439  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
440  br i1 %my.tmp3, label %bb9, label %bb1
441
442bb9:                                              ; preds = %Flow
443  store volatile i32 7, i32 addrspace(3)* undef
444  ret void
445}
446
447; Swap order of branches in flow block so that the true phi is
448; continue.
449
450define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
451; OPT-LABEL: @invert_true_phi_cond_break_loop(
452; OPT-NEXT:  bb:
453; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
454; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
455; OPT-NEXT:    br label [[BB1:%.*]]
456; OPT:       bb1:
457; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
458; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
459; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
460; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
461; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
462; OPT:       bb4:
463; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
464; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
465; OPT-NEXT:    br label [[FLOW]]
466; OPT:       Flow:
467; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
468; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true
469; OPT-NEXT:    [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]])
470; OPT-NEXT:    [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]])
471; OPT-NEXT:    br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]]
472; OPT:       bb9:
473; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]])
474; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
475; OPT-NEXT:    ret void
476;
477; GCN-LABEL: invert_true_phi_cond_break_loop:
478; GCN:       ; %bb.0: ; %bb
479; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
480; GCN-NEXT:    s_mov_b64 s[0:1], 0
481; GCN-NEXT:    s_mov_b32 s2, -1
482; GCN-NEXT:    s_waitcnt lgkmcnt(0)
483; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
484; GCN-NEXT:    s_mov_b32 s3, 0xf000
485; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
486; GCN-NEXT:    ; implicit-def: $sgpr6
487; GCN-NEXT:  BB5_1: ; %bb1
488; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
489; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
490; GCN-NEXT:    s_cmp_gt_i32 s6, -1
491; GCN-NEXT:    s_cbranch_scc1 BB5_3
492; GCN-NEXT:  ; %bb.2: ; %bb4
493; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
494; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0
495; GCN-NEXT:    s_waitcnt vmcnt(0)
496; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
497; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
498; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
499; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
500; GCN-NEXT:  BB5_3: ; %Flow
501; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
502; GCN-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
503; GCN-NEXT:    s_add_i32 s6, s6, 1
504; GCN-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
505; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
506; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
507; GCN-NEXT:    s_cbranch_execnz BB5_1
508; GCN-NEXT:  ; %bb.4: ; %bb9
509; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
510; GCN-NEXT:    v_mov_b32_e32 v0, 7
511; GCN-NEXT:    s_mov_b32 m0, -1
512; GCN-NEXT:    ds_write_b32 v0, v0
513; GCN-NEXT:    s_endpgm
514bb:
515  %id = call i32 @llvm.amdgcn.workitem.id.x()
516  %my.tmp = sub i32 %id, %arg
517  br label %bb1
518
519bb1:                                              ; preds = %Flow, %bb
520  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
521  %lsr.iv.next = add i32 %lsr.iv, 1
522  %cmp0 = icmp slt i32 %lsr.iv.next, 0
523  br i1 %cmp0, label %bb4, label %Flow
524
525bb4:                                              ; preds = %bb1
526  %load = load volatile i32, i32 addrspace(1)* undef, align 4
527  %cmp1 = icmp sge i32 %my.tmp, %load
528  br label %Flow
529
530Flow:                                             ; preds = %bb4, %bb1
531  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
532  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
533  br i1 %my.tmp3, label %bb1, label %bb9
534
535bb9:                                              ; preds = %Flow
536  store volatile i32 7, i32 addrspace(3)* undef
537  ret void
538}
539
540declare i32 @llvm.amdgcn.workitem.id.x() #1
541
542attributes #0 = { nounwind }
543attributes #1 = { nounwind readnone }
544