1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
3
4; Make sure the branch targets are correct after lowering llvm.amdgcn.if
5
6define i32 @divergent_if_swap_brtarget_order0(i32 %value) {
7; CHECK-LABEL: divergent_if_swap_brtarget_order0:
8; CHECK:       ; %bb.0: ; %entry
9; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
11; CHECK-NEXT:    ; implicit-def: $vgpr0
12; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
13; CHECK-NEXT:    s_cbranch_execz BB0_2
14; CHECK-NEXT:  ; %bb.1: ; %if.true
15; CHECK-NEXT:    global_load_dword v0, v[0:1], off
16; CHECK-NEXT:  BB0_2: ; %endif
17; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
18; CHECK-NEXT:    s_waitcnt vmcnt(0)
19; CHECK-NEXT:    s_setpc_b64 s[30:31]
20entry:
21  %c = icmp ne i32 %value, 0
22  br i1 %c, label %if.true, label %endif
23
24if.true:
25  %val = load volatile i32, i32 addrspace(1)* undef
26  br label %endif
27
28endif:
29  %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
30  ret i32 %v
31}
32
33define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
34; CHECK-LABEL: divergent_if_swap_brtarget_order1:
35; CHECK:       ; %bb.0: ; %entry
36; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
38; CHECK-NEXT:    ; implicit-def: $vgpr0
39; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
40; CHECK-NEXT:    s_cbranch_execz BB1_2
41; CHECK-NEXT:  ; %bb.1: ; %if.true
42; CHECK-NEXT:    global_load_dword v0, v[0:1], off
43; CHECK-NEXT:  BB1_2: ; %endif
44; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
45; CHECK-NEXT:    s_waitcnt vmcnt(0)
46; CHECK-NEXT:    s_setpc_b64 s[30:31]
47entry:
48  %c = icmp ne i32 %value, 0
49  br i1 %c, label %if.true, label %endif
50
51endif:
52  %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
53  ret i32 %v
54
55if.true:
56  %val = load volatile i32, i32 addrspace(1)* undef
57  br label %endif
58}
59
60; Make sure and 1 is inserted on llvm.amdgcn.if
61define i32 @divergent_if_nonboolean_condition0(i32 %value) {
62; CHECK-LABEL: divergent_if_nonboolean_condition0:
63; CHECK:       ; %bb.0: ; %entry
64; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
66; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
67; CHECK-NEXT:    ; implicit-def: $vgpr0
68; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
69; CHECK-NEXT:    s_cbranch_execz BB2_2
70; CHECK-NEXT:  ; %bb.1: ; %if.true
71; CHECK-NEXT:    global_load_dword v0, v[0:1], off
72; CHECK-NEXT:  BB2_2: ; %endif
73; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
74; CHECK-NEXT:    s_waitcnt vmcnt(0)
75; CHECK-NEXT:    s_setpc_b64 s[30:31]
76entry:
77  %c = trunc i32 %value to i1
78  br i1 %c, label %if.true, label %endif
79
80if.true:
81  %val = load volatile i32, i32 addrspace(1)* undef
82  br label %endif
83
84endif:
85  %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
86  ret i32 %v
87}
88
89; Make sure and 1 is inserted on llvm.amdgcn.if
90define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) {
91; CHECK-LABEL: divergent_if_nonboolean_condition1:
92; CHECK:       ; %bb.0: ; %entry
93; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; CHECK-NEXT:    global_load_dword v0, v[0:1], off
95; CHECK-NEXT:    s_waitcnt vmcnt(0)
96; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
97; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
98; CHECK-NEXT:    ; implicit-def: $vgpr0
99; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
100; CHECK-NEXT:    s_cbranch_execz BB3_2
101; CHECK-NEXT:  ; %bb.1: ; %if.true
102; CHECK-NEXT:    global_load_dword v0, v[0:1], off
103; CHECK-NEXT:  BB3_2: ; %endif
104; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
105; CHECK-NEXT:    s_waitcnt vmcnt(0)
106; CHECK-NEXT:    s_setpc_b64 s[30:31]
107entry:
108  %value = load i32, i32 addrspace(1)* %ptr
109  %c = trunc i32 %value to i1
110  br i1 %c, label %if.true, label %endif
111
112if.true:
113  %val = load volatile i32, i32 addrspace(1)* undef
114  br label %endif
115
116endif:
117  %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
118  ret i32 %v
119}
120
121@external_constant = external addrspace(4) constant i32, align 4
122@const.ptr = external addrspace(4) constant float*, align 4
123
124; Make sure this case compiles. G_ICMP was mis-mapped due to having
125; the result register class constrained by llvm.amdgcn.if lowering.
126define void @constrained_if_register_class() {
127; CHECK-LABEL: constrained_if_register_class:
128; CHECK:       ; %bb.0: ; %bb
129; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; CHECK-NEXT:    s_getpc_b64 s[4:5]
131; CHECK-NEXT:    s_add_u32 s4, s4, external_constant@gotpcrel32@lo+4
132; CHECK-NEXT:    s_addc_u32 s5, s5, external_constant@gotpcrel32@hi+12
133; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
134; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
135; CHECK-NEXT:    s_load_dword s4, s[4:5], 0x0
136; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
137; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
138; CHECK-NEXT:    s_cselect_b32 s4, 1, 0
139; CHECK-NEXT:    s_and_b32 s4, s4, 1
140; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
141; CHECK-NEXT:    s_cbranch_scc1 BB4_4
142; CHECK-NEXT:  ; %bb.1: ; %bb2
143; CHECK-NEXT:    s_getpc_b64 s[6:7]
144; CHECK-NEXT:    s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4
145; CHECK-NEXT:    s_addc_u32 s7, s7, const.ptr@gotpcrel32@hi+12
146; CHECK-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
147; CHECK-NEXT:    v_mov_b32_e32 v0, 0
148; CHECK-NEXT:    s_mov_b32 s4, -1
149; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
150; CHECK-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
151; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
152; CHECK-NEXT:    global_load_dword v0, v0, s[6:7]
153; CHECK-NEXT:    s_waitcnt vmcnt(0)
154; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, 1.0, v0
155; CHECK-NEXT:    s_cbranch_vccnz BB4_3
156; CHECK-NEXT:  ; %bb.2: ; %bb7
157; CHECK-NEXT:    s_mov_b32 s4, 0
158; CHECK-NEXT:  BB4_3: ; %bb8
159; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
160; CHECK-NEXT:    s_cselect_b32 s4, 1, 0
161; CHECK-NEXT:    s_and_b32 s4, s4, 1
162; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
163; CHECK-NEXT:    s_cbranch_scc0 BB4_5
164; CHECK-NEXT:  BB4_4: ; %bb12
165; CHECK-NEXT:    s_setpc_b64 s[30:31]
166; CHECK-NEXT:  BB4_5: ; %bb11
167; CHECK-NEXT:    v_mov_b32_e32 v0, 4.0
168; CHECK-NEXT:    buffer_store_dword v0, v0, s[0:3], 0 offen
169; CHECK-NEXT:    s_waitcnt vmcnt(0)
170; CHECK-NEXT:    s_setpc_b64 s[30:31]
171bb:
172  %tmp = load i32, i32 addrspace(4)* @external_constant
173  %ptr = load float*, float* addrspace(4)* @const.ptr
174  %tmp1 = icmp ne i32 %tmp, 0
175  br i1 %tmp1, label %bb12, label %bb2
176
177bb2:
178  %tmp4 = load float, float* %ptr, align 4
179  %tmp5 = fcmp olt float %tmp4, 1.0
180  %tmp6 = or i1 %tmp5, false
181  br i1 %tmp6, label %bb8, label %bb7
182
183bb7:
184  br label %bb8
185
186bb8:
187  %tmp9 = phi i32 [ 0, %bb7 ], [ -1, %bb2 ]
188  %tmp10 = icmp eq i32 %tmp9, 0
189  br i1 %tmp10, label %bb11, label %bb12
190
191bb11:
192  store float 4.0, float addrspace(5)* undef, align 4
193  br label %bb12
194
195bb12:
196  ret void
197}
198
199define amdgpu_kernel void @break_loop(i32 %arg) {
200; CHECK-LABEL: break_loop:
201; CHECK:       ; %bb.0: ; %bb
202; CHECK-NEXT:    s_load_dword s2, s[4:5], 0x0
203; CHECK-NEXT:    s_mov_b64 s[0:1], 0
204; CHECK-NEXT:    ; implicit-def: $vgpr1
205; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
206; CHECK-NEXT:    v_subrev_u32_e32 v0, s2, v0
207; CHECK-NEXT:    s_branch BB5_2
208; CHECK-NEXT:  BB5_1: ; %Flow
209; CHECK-NEXT:    ; in Loop: Header=BB5_2 Depth=1
210; CHECK-NEXT:    s_and_b64 s[2:3], exec, s[2:3]
211; CHECK-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
212; CHECK-NEXT:    s_andn2_b64 exec, exec, s[0:1]
213; CHECK-NEXT:    s_cbranch_execz BB5_4
214; CHECK-NEXT:  BB5_2: ; %bb1
215; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
216; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
217; CHECK-NEXT:    v_cmp_le_i32_e32 vcc, 0, v1
218; CHECK-NEXT:    v_cmp_ne_u32_e64 s[2:3], 0, 1
219; CHECK-NEXT:    s_cbranch_vccnz BB5_1
220; CHECK-NEXT:  ; %bb.3: ; %bb4
221; CHECK-NEXT:    ; in Loop: Header=BB5_2 Depth=1
222; CHECK-NEXT:    global_load_dword v2, v[0:1], off
223; CHECK-NEXT:    s_waitcnt vmcnt(0)
224; CHECK-NEXT:    v_cmp_ge_i32_e64 s[2:3], v0, v2
225; CHECK-NEXT:    s_branch BB5_1
226; CHECK-NEXT:  BB5_4: ; %bb9
227; CHECK-NEXT:    s_endpgm
228bb:
229  %id = call i32 @llvm.amdgcn.workitem.id.x()
230  %tmp = sub i32 %id, %arg
231  br label %bb1
232
233bb1:
234  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
235  %lsr.iv.next = add i32 %lsr.iv, 1
236  %cmp0 = icmp slt i32 %lsr.iv.next, 0
237  br i1 %cmp0, label %bb4, label %bb9
238
239bb4:
240  %load = load volatile i32, i32 addrspace(1)* undef, align 4
241  %cmp1 = icmp slt i32 %tmp, %load
242  br i1 %cmp1, label %bb1, label %bb9
243
244bb9:
245  ret void
246}
247
248declare i32 @llvm.amdgcn.workitem.id.x()
249