1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
2
3declare i32 @llvm.r600.read.tidig.x() nounwind readnone
4
5; SI-LABEL: @test_if
6; Make sure the i1 values created by the cfg structurizer pass are
7; moved using VALU instructions
8; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
9; SI: v_mov_b32_e32 v{{[0-9]}}, -1
10define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
11entry:
12  switch i32 %a, label %default [
13    i32 0, label %case0
14    i32 1, label %case1
15  ]
16
17case0:
18  %arrayidx1 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
19  store i32 0, i32 addrspace(1)* %arrayidx1, align 4
20  br label %end
21
22case1:
23  %arrayidx5 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
24  store i32 1, i32 addrspace(1)* %arrayidx5, align 4
25  br label %end
26
27default:
28  %cmp8 = icmp eq i32 %a, 2
29  %arrayidx10 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
30  br i1 %cmp8, label %if, label %else
31
32if:
33  store i32 2, i32 addrspace(1)* %arrayidx10, align 4
34  br label %end
35
36else:
37  store i32 3, i32 addrspace(1)* %arrayidx10, align 4
38  br label %end
39
40end:
41  ret void
42}
43
44; SI-LABEL: @simple_test_v_if
45; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
46; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
47; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
48
49; SI: ; BB#1
50; SI: buffer_store_dword
51; SI: s_endpgm
52
53; SI: BB1_2:
54; SI: s_or_b64 exec, exec, [[BR_SREG]]
55; SI: s_endpgm
56define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
57  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
58  %is.0 = icmp ne i32 %tid, 0
59  br i1 %is.0, label %store, label %exit
60
61store:
62  %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid
63  store i32 999, i32 addrspace(1)* %gep
64  ret void
65
66exit:
67  ret void
68}
69
70; SI-LABEL: @simple_test_v_loop
71; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
72; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
73; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
74; SI: s_cbranch_execz BB2_2
75
76; SI: ; BB#1:
77; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
78
79; SI: BB2_3:
80; SI: buffer_load_dword
81; SI: buffer_store_dword
82; SI: v_cmp_eq_i32_e32 vcc,
83; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]]
84; SI: s_andn2_b64 exec, exec, [[OR_SREG]]
85; SI: s_cbranch_execnz BB2_3
86
87define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
88entry:
89  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
90  %is.0 = icmp ne i32 %tid, 0
91  %limit = add i32 %tid, 64
92  br i1 %is.0, label %loop, label %exit
93
94loop:
95  %i = phi i32 [%tid, %entry], [%i.inc, %loop]
96  %gep.src = getelementptr i32, i32 addrspace(1)* %src, i32 %i
97  %gep.dst = getelementptr i32, i32 addrspace(1)* %dst, i32 %i
98  %load = load i32, i32 addrspace(1)* %src
99  store i32 %load, i32 addrspace(1)* %gep.dst
100  %i.inc = add nsw i32 %i, 1
101  %cmp = icmp eq i32 %limit, %i.inc
102  br i1 %cmp, label %exit, label %loop
103
104exit:
105  ret void
106}
107
108; SI-LABEL: @multi_vcond_loop
109
110; Load loop limit from buffer
111; Branch to exit if uniformly not taken
112; SI: ; BB#0:
113; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
114; SI: v_cmp_lt_i32_e32 vcc
115; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc
116; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
117; SI: s_cbranch_execz BB3_2
118
119; Initialize inner condition to false
120; SI: ; BB#1:
121; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}}
122; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]
123
124; Clear exec bits for workitems that load -1s
125; SI: BB3_3:
126; SI: buffer_load_dword [[B:v[0-9]+]]
127; SI: buffer_load_dword [[A:v[0-9]+]]
128; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
129; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
130; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
131; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]]
132; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]]
133; SI: s_cbranch_execz BB3_5
134
135; SI: BB#4:
136; SI: buffer_store_dword
137; SI: v_cmp_ge_i64_e32 vcc
138; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]]
139
140; SI: BB3_5:
141; SI: s_or_b64 exec, exec, [[ORNEG1]]
142; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]]
143; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
144; SI: s_cbranch_execnz BB3_3
145
146; SI: BB#6
147; SI: s_or_b64 exec, exec, [[COND_STATE]]
148
149; SI: BB3_2:
150; SI-NOT: [[COND_STATE]]
151; SI: s_endpgm
152
153define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
154bb:
155  %tmp = tail call i32 @llvm.r600.read.tidig.x() #0
156  %tmp4 = sext i32 %tmp to i64
157  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4
158  %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
159  %tmp7 = icmp sgt i32 %tmp6, 0
160  %tmp8 = sext i32 %tmp6 to i64
161  br i1 %tmp7, label %bb10, label %bb26
162
163bb10:                                             ; preds = %bb, %bb20
164  %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ]
165  %tmp12 = add nsw i64 %tmp11, %tmp4
166  %tmp13 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp12
167  %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4
168  %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp12
169  %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
170  %tmp17 = icmp ne i32 %tmp14, -1
171  %tmp18 = icmp ne i32 %tmp16, -1
172  %tmp19 = and i1 %tmp17, %tmp18
173  br i1 %tmp19, label %bb20, label %bb26
174
175bb20:                                             ; preds = %bb10
176  %tmp21 = add nsw i32 %tmp16, %tmp14
177  %tmp22 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp12
178  store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4
179  %tmp23 = add nuw nsw i64 %tmp11, 1
180  %tmp24 = icmp slt i64 %tmp23, %tmp8
181  br i1 %tmp24, label %bb10, label %bb26
182
183bb26:                                             ; preds = %bb10, %bb20, %bb
184  ret void
185}
186
187attributes #0 = { nounwind readnone }
188attributes #1 = { nounwind }
189