1; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
2
3declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
4
5; SI-LABEL: @test_if
6; Make sure the i1 values created by the cfg structurizer pass are
7; moved using VALU instructions
8; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
9; SI: v_mov_b32_e32 v{{[0-9]}}, -1
10define void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
11entry:
12  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
13  switch i32 %tid, label %default [
14    i32 0, label %case0
15    i32 1, label %case1
16  ]
17
18case0:
19  %arrayidx1 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
20  store i32 0, i32 addrspace(1)* %arrayidx1, align 4
21  br label %end
22
23case1:
24  %arrayidx5 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
25  store i32 1, i32 addrspace(1)* %arrayidx5, align 4
26  br label %end
27
28default:
29  %cmp8 = icmp eq i32 %tid, 2
30  %arrayidx10 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
31  br i1 %cmp8, label %if, label %else
32
33if:
34  store i32 2, i32 addrspace(1)* %arrayidx10, align 4
35  br label %end
36
37else:
38  store i32 3, i32 addrspace(1)* %arrayidx10, align 4
39  br label %end
40
41end:
42  ret void
43}
44
45; SI-LABEL: @simple_test_v_if
46; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
47; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
48; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
49
50; SI: ; BB#1
51; SI: buffer_store_dword
52; SI: s_endpgm
53
54; SI: BB1_2:
55; SI: s_or_b64 exec, exec, [[BR_SREG]]
56; SI: s_endpgm
57define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
58  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
59  %is.0 = icmp ne i32 %tid, 0
60  br i1 %is.0, label %store, label %exit
61
62store:
63  %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid
64  store i32 999, i32 addrspace(1)* %gep
65  ret void
66
67exit:
68  ret void
69}
70
71; SI-LABEL: @simple_test_v_loop
72; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
73; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
74; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
75; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
76
77; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
78
79; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
80; SI: buffer_load_dword
81; SI-DAG: buffer_store_dword
82; SI-DAG: v_cmp_eq_i32_e32 vcc,
83; SI-DAG: s_and_b64 vcc, exec, vcc
84; SI: s_cbranch_vccz [[LABEL_LOOP]]
85; SI: [[LABEL_EXIT]]:
86; SI: s_endpgm
87
88define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
89entry:
90  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
91  %is.0 = icmp ne i32 %tid, 0
92  %limit = add i32 %tid, 64
93  br i1 %is.0, label %loop, label %exit
94
95loop:
96  %i = phi i32 [%tid, %entry], [%i.inc, %loop]
97  %gep.src = getelementptr i32, i32 addrspace(1)* %src, i32 %i
98  %gep.dst = getelementptr i32, i32 addrspace(1)* %dst, i32 %i
99  %load = load i32, i32 addrspace(1)* %src
100  store i32 %load, i32 addrspace(1)* %gep.dst
101  %i.inc = add nsw i32 %i, 1
102  %cmp = icmp eq i32 %limit, %i.inc
103  br i1 %cmp, label %exit, label %loop
104
105exit:
106  ret void
107}
108
109; SI-LABEL: @multi_vcond_loop
110
111; Load loop limit from buffer
112; Branch to exit if uniformly not taken
113; SI: ; BB#0:
114; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
115; SI: v_cmp_lt_i32_e32 vcc
116; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc
117; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
118; SI: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
119
120; Initialize inner condition to false
121; SI: ; BB#1:
122; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}}
123; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]
124
125; Clear exec bits for workitems that load -1s
126; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
127; SI: buffer_load_dword [[B:v[0-9]+]]
128; SI: buffer_load_dword [[A:v[0-9]+]]
129; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
130; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
131; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
132; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
133; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]]
134; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
135
136; SI: BB#3:
137; SI: buffer_store_dword
138; SI: v_cmp_ge_i64_e32 [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
139; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]]
140
141; SI: [[LABEL_FLOW]]:
142; SI: s_or_b64 exec, exec, [[ORNEG2]]
143; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[TMP]]
144; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
145; SI: s_cbranch_execnz [[LABEL_LOOP]]
146
147; SI: BB#5
148; SI: s_or_b64 exec, exec, [[COND_STATE]]
149
150; SI: [[LABEL_EXIT]]:
151; SI-NOT: [[COND_STATE]]
152; SI: s_endpgm
153
154define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
155bb:
156  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
157  %tmp4 = sext i32 %tmp to i64
158  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4
159  %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
160  %tmp7 = icmp sgt i32 %tmp6, 0
161  %tmp8 = sext i32 %tmp6 to i64
162  br i1 %tmp7, label %bb10, label %bb26
163
164bb10:                                             ; preds = %bb, %bb20
165  %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ]
166  %tmp12 = add nsw i64 %tmp11, %tmp4
167  %tmp13 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp12
168  %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4
169  %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp12
170  %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
171  %tmp17 = icmp ne i32 %tmp14, -1
172  %tmp18 = icmp ne i32 %tmp16, -1
173  %tmp19 = and i1 %tmp17, %tmp18
174  br i1 %tmp19, label %bb20, label %bb26
175
176bb20:                                             ; preds = %bb10
177  %tmp21 = add nsw i32 %tmp16, %tmp14
178  %tmp22 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp12
179  store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4
180  %tmp23 = add nuw nsw i64 %tmp11, 1
181  %tmp24 = icmp slt i64 %tmp23, %tmp8
182  br i1 %tmp24, label %bb10, label %bb26
183
184bb26:                                             ; preds = %bb10, %bb20, %bb
185  ret void
186}
187
188attributes #0 = { nounwind readnone }
189attributes #1 = { nounwind }
190