1; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
2; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
3
4; SI-LABEL: {{^}}uniform_if_scc:
5; SI-DAG: s_cmp_eq_i32 s{{[0-9]+}}, 0
6; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
7; SI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
8
9; Fall-through to the else
10; SI: v_mov_b32_e32 [[STORE_VAL]], 1
11
12; SI: [[IF_LABEL]]:
13; SI: buffer_store_dword [[STORE_VAL]]
14define void @uniform_if_scc(i32 %cond, i32 addrspace(1)* %out) {
15entry:
16  %cmp0 = icmp eq i32 %cond, 0
17  br i1 %cmp0, label %if, label %else
18
19if:
20  br label %done
21
22else:
23  br label %done
24
25done:
26  %value = phi i32 [0, %if], [1, %else]
27  store i32 %value, i32 addrspace(1)* %out
28  ret void
29}
30
31; SI-LABEL: {{^}}uniform_if_vcc:
32; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
33; also scheduled the write first.
34; SI-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
35; SI-DAG: s_and_b64 vcc, exec, [[COND]]
36; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
37; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
38
39; Fall-through to the else
40; SI: v_mov_b32_e32 [[STORE_VAL]], 1
41
42; SI: [[IF_LABEL]]:
43; SI: buffer_store_dword [[STORE_VAL]]
44define void @uniform_if_vcc(float %cond, i32 addrspace(1)* %out) {
45entry:
46  %cmp0 = fcmp oeq float %cond, 0.0
47  br i1 %cmp0, label %if, label %else
48
49if:
50  br label %done
51
52else:
53  br label %done
54
55done:
56  %value = phi i32 [0, %if], [1, %else]
57  store i32 %value, i32 addrspace(1)* %out
58  ret void
59}
60
61; SI-LABEL: {{^}}uniform_if_swap_br_targets_scc:
62; SI-DAG: s_cmp_lg_i32 s{{[0-9]+}}, 0
63; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
64; SI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
65
66; Fall-through to the else
67; SI: v_mov_b32_e32 [[STORE_VAL]], 1
68
69; SI: [[IF_LABEL]]:
70; SI: buffer_store_dword [[STORE_VAL]]
71define void @uniform_if_swap_br_targets_scc(i32 %cond, i32 addrspace(1)* %out) {
72entry:
73  %cmp0 = icmp eq i32 %cond, 0
74  br i1 %cmp0, label %else, label %if
75
76if:
77  br label %done
78
79else:
80  br label %done
81
82done:
83  %value = phi i32 [0, %if], [1, %else]
84  store i32 %value, i32 addrspace(1)* %out
85  ret void
86}
87
88; SI-LABEL: {{^}}uniform_if_swap_br_targets_vcc:
89; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
90; also scheduled the write first.
91; SI-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
92; SI-DAG: s_and_b64 vcc, exec, [[COND]]
93; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
94; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
95
96; Fall-through to the else
97; SI: v_mov_b32_e32 [[STORE_VAL]], 1
98
99; SI: [[IF_LABEL]]:
100; SI: buffer_store_dword [[STORE_VAL]]
101define void @uniform_if_swap_br_targets_vcc(float %cond, i32 addrspace(1)* %out) {
102entry:
103  %cmp0 = fcmp oeq float %cond, 0.0
104  br i1 %cmp0, label %else, label %if
105
106if:
107  br label %done
108
109else:
110  br label %done
111
112done:
113  %value = phi i32 [0, %if], [1, %else]
114  store i32 %value, i32 addrspace(1)* %out
115  ret void
116}
117
118; SI-LABEL: {{^}}uniform_if_move_valu:
119; SI: v_add_f32_e32 [[CMP:v[0-9]+]]
120; Using a floating-point value in an integer compare will cause the compare to
121; be selected for the SALU and then later moved to the VALU.
122; SI: v_cmp_ne_i32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]]
123; SI: s_and_b64 vcc, exec, [[COND]]
124; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
125; SI: buffer_store_dword
126; SI: [[ENDIF_LABEL]]:
127; SI: s_endpgm
128define void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) {
129entry:
130  %a.0 = fadd float %a, 10.0
131  %cond = bitcast float %a.0 to i32
132  %cmp = icmp eq i32 %cond, 5
133  br i1 %cmp, label %if, label %endif
134
135if:
136  store i32 0, i32 addrspace(1)* %out
137  br label %endif
138
139endif:
140  ret void
141}
142
143; SI-LABEL: {{^}}uniform_if_move_valu_commute:
144; SI: v_add_f32_e32 [[CMP:v[0-9]+]]
145; Using a floating-point value in an integer compare will cause the compare to
146; be selected for the SALU and then later moved to the VALU.
147; SI: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]]
148; SI: s_and_b64 vcc, exec, [[COND]]
149; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
150; SI: buffer_store_dword
151; SI: [[ENDIF_LABEL]]:
152; SI: s_endpgm
153define void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) {
154entry:
155  %a.0 = fadd float %a, 10.0
156  %cond = bitcast float %a.0 to i32
157  %cmp = icmp ugt i32 %cond, 5
158  br i1 %cmp, label %if, label %endif
159
160if:
161  store i32 0, i32 addrspace(1)* %out
162  br label %endif
163
164endif:
165  ret void
166}
167
168
169; SI-LABEL: {{^}}uniform_if_else_ret:
170; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
171; SI-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
172
173; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
174; SI: buffer_store_dword [[TWO]]
175; SI: s_endpgm
176
177; SI: {{^}}[[IF_LABEL]]:
178; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
179; SI: buffer_store_dword [[ONE]]
180; SI: s_endpgm
181define void @uniform_if_else_ret(i32 addrspace(1)* nocapture %out, i32 %a) {
182entry:
183  %cmp = icmp eq i32 %a, 0
184  br i1 %cmp, label %if.then, label %if.else
185
186if.then:                                          ; preds = %entry
187  store i32 1, i32 addrspace(1)* %out
188  br label %if.end
189
190if.else:                                          ; preds = %entry
191  store i32 2, i32 addrspace(1)* %out
192  br label %if.end
193
194if.end:                                           ; preds = %if.else, %if.then
195  ret void
196}
197
198; SI-LABEL: {{^}}uniform_if_else:
199; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
200; SI-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
201
202; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
203; SI: buffer_store_dword [[TWO]]
204; SI: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
205
206; SI: [[IF_LABEL]]:
207; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
208; SI: buffer_store_dword [[ONE]]
209
210; SI: [[ENDIF_LABEL]]:
211; SI: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
212; SI: buffer_store_dword [[THREE]]
213; SI: s_endpgm
214define void @uniform_if_else(i32 addrspace(1)* nocapture %out0, i32 addrspace(1)* nocapture %out1, i32 %a) {
215entry:
216  %cmp = icmp eq i32 %a, 0
217  br i1 %cmp, label %if.then, label %if.else
218
219if.then:                                          ; preds = %entry
220  store i32 1, i32 addrspace(1)* %out0
221  br label %if.end
222
223if.else:                                          ; preds = %entry
224  store i32 2, i32 addrspace(1)* %out0
225  br label %if.end
226
227if.end:                                           ; preds = %if.else, %if.then
228  store i32 3, i32 addrspace(1)* %out1
229  ret void
230}
231
232; SI-LABEL: {{^}}icmp_2_users:
233; SI: s_cmp_lt_i32 s{{[0-9]+}}, 1
234; SI: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]]
235; SI: buffer_store_dword
236; SI: [[LABEL]]:
237; SI: s_endpgm
238define void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) {
239main_body:
240  %0 = icmp sgt i32 %cond, 0
241  %1 = sext i1 %0 to i32
242  br i1 %0, label %IF, label %ENDIF
243
244IF:
245  store i32 %1, i32 addrspace(1)* %out
246  br label %ENDIF
247
248ENDIF:                                            ; preds = %IF, %main_body
249  ret void
250}
251
252; SI-LABEL: {{^}}icmp_users_different_blocks:
253; SI: s_load_dword [[COND:s[0-9]+]]
254; SI: s_cmp_lt_i32 [[COND]], 1
255; SI: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
256; SI: v_cmp_lt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0, [[COND]]
257; SI: s_and_b64 vcc, exec, [[MASK]]
258; SI: s_cbranch_vccnz [[EXIT]]
259; SI: buffer_store
260; SI: {{^}}[[EXIT]]:
261; SI: s_endpgm
262define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) {
263bb:
264  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
265  %cmp0 = icmp sgt i32 %cond0, 0
266  %cmp1 = icmp sgt i32 %cond1, 0
267  br i1 %cmp0, label %bb2, label %bb9
268
269bb2:                                              ; preds = %bb
270  %tmp2 = sext i1 %cmp1 to i32
271  %tmp3 = add i32 %tmp2, %tmp
272  br i1 %cmp1, label %bb9, label %bb7
273
274bb7:                                              ; preds = %bb5
275  store i32 %tmp3, i32 addrspace(1)* %out
276  br label %bb9
277
278bb9:                                              ; preds = %bb8, %bb4
279  ret void
280}
281
282; SI-LABEL: {{^}}uniform_loop:
283; SI: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]:
284; FIXME: We need to teach SIFixSGPRCopies about uniform branches so we
285;        get s_add_i32 here.
286; SI: v_add_i32_e32 [[I:v[0-9]+]], vcc, -1, v{{[0-9]+}}
287; SI: v_cmp_ne_i32_e32 vcc, 0, [[I]]
288; SI: s_and_b64 vcc, exec, vcc
289; SI: s_cbranch_vccnz [[LOOP_LABEL]]
290; SI: s_endpgm
291define void @uniform_loop(i32 addrspace(1)* %out, i32 %a) {
292entry:
293  br label %loop
294
295loop:
296  %i = phi i32 [0, %entry], [%i.i, %loop]
297  %i.i = add i32 %i, 1
298  %cmp = icmp eq i32 %a, %i.i
299  br i1 %cmp, label %done, label %loop
300
301done:
302  ret void
303}
304
305; Test uniform and divergent.
306
307; SI-LABEL: {{^}}uniform_inside_divergent:
308; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
309; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
310; SI: s_xor_b64  [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
311; SI: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
312; SI: s_cmp_lg_i32 {{s[0-9]+}}, 0
313; SI: s_cbranch_scc1 [[ENDIF_LABEL]]
314; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
315; SI: buffer_store_dword [[ONE]]
316define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) {
317entry:
318  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
319  %d_cmp = icmp ult i32 %tid, 16
320  br i1 %d_cmp, label %if, label %endif
321
322if:
323  store i32 0, i32 addrspace(1)* %out
324  %u_cmp = icmp eq i32 %cond, 0
325  br i1 %u_cmp, label %if_uniform, label %endif
326
327if_uniform:
328  store i32 1, i32 addrspace(1)* %out
329  br label %endif
330
331endif:
332  ret void
333}
334
335; SI-LABEL: {{^}}divergent_inside_uniform:
336; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
337; SI: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
338; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
339; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
340; SI: s_xor_b64  [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
341; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
342; SI: buffer_store_dword [[ONE]]
343; SI: [[ENDIF_LABEL]]:
344; SI: s_endpgm
345define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
346entry:
347  %u_cmp = icmp eq i32 %cond, 0
348  br i1 %u_cmp, label %if, label %endif
349
350if:
351  store i32 0, i32 addrspace(1)* %out
352  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
353  %d_cmp = icmp ult i32 %tid, 16
354  br i1 %d_cmp, label %if_uniform, label %endif
355
356if_uniform:
357  store i32 1, i32 addrspace(1)* %out
358  br label %endif
359
360endif:
361  ret void
362}
363
364; SI-LABEL: {{^}}divergent_if_uniform_if:
365; SI: v_cmp_eq_i32_e32 vcc, 0, v0
366; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
367; SI: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
368; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
369; SI: buffer_store_dword [[ONE]]
370; SI: s_or_b64 exec, exec, [[MASK]]
371; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0
372; SI: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]]
373; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
374; SI: buffer_store_dword [[TWO]]
375; SI: [[EXIT]]:
376; SI: s_endpgm
377define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) {
378entry:
379  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
380  %d_cmp = icmp eq i32 %tid, 0
381  br i1 %d_cmp, label %if, label %endif
382
383if:
384  store i32 1, i32 addrspace(1)* %out
385  br label %endif
386
387endif:
388  %u_cmp = icmp eq i32 %cond, 0
389  br i1 %u_cmp, label %if_uniform, label %exit
390
391if_uniform:
392  store i32 2, i32 addrspace(1)* %out
393  br label %exit
394
395exit:
396  ret void
397}
398
399; The condition of the branches in the two blocks are
400; uniform. MachineCSE replaces the 2nd condition with the inverse of
401; the first, leaving an scc use in a different block than it was
402; defed.
403
404; SI-LABEL: {{^}}cse_uniform_condition_different_blocks:
405; SI: s_load_dword [[COND:s[0-9]+]]
406; SI: s_cmp_lt_i32 [[COND]], 1
407; SI: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3
408
409; SI: BB#1:
410; SI-NOT: cmp
411; SI: buffer_load_dword
412; SI: buffer_store_dword
413; SI: s_cbranch_scc1 BB[[FNNUM]]_3
414
415; SI: BB[[FNNUM]]_3:
416; SI: s_endpgm
417define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) {
418bb:
419  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
420  %tmp1 = icmp sgt i32 %cond, 0
421  br i1 %tmp1, label %bb2, label %bb9
422
423bb2:                                              ; preds = %bb
424  %tmp3 = load volatile i32, i32 addrspace(1)* undef
425  store volatile i32 0, i32 addrspace(1)* undef
426  %tmp9 = icmp sle i32 %cond, 0
427  br i1 %tmp9, label %bb9, label %bb7
428
429bb7:                                              ; preds = %bb5
430  store i32 %tmp3, i32 addrspace(1)* %out
431  br label %bb9
432
433bb9:                                              ; preds = %bb8, %bb4
434  ret void
435}
436
437declare i32 @llvm.amdgcn.workitem.id.x() #0
438
439attributes #0 = { readnone }
440