1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-remove-redundant-endcf < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}simple_nested_if: 4; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] 5; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]] 6; GCN: s_and_b64 exec, exec, vcc 7; GCN-NEXT: s_cbranch_execz [[ENDIF]] 8; GCN-NEXT: ; %bb.{{[0-9]+}}: 9; GCN: store_dword 10; GCN-NEXT: {{^}}[[ENDIF]]: 11; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]] 12; GCN: ds_write_b32 13; GCN: s_endpgm 14 15define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) { 16bb: 17 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 18 %tmp1 = icmp ugt i32 %tmp, 1 19 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end 20 21bb.outer.then: ; preds = %bb 22 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 23 store i32 0, i32 addrspace(1)* %tmp4, align 4 24 %tmp5 = icmp eq i32 %tmp, 2 25 br i1 %tmp5, label %bb.outer.end, label %bb.inner.then 26 27bb.inner.then: ; preds = %bb.outer.then 28 %tmp7 = add i32 %tmp, 1 29 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 30 store i32 1, i32 addrspace(1)* %tmp9, align 4 31 br label %bb.outer.end 32 33bb.outer.end: ; preds = %bb.outer.then, %bb.inner.then, %bb 34 store i32 3, i32 addrspace(3)* null 35 ret void 36} 37 38; GCN-LABEL: {{^}}uncollapsable_nested_if: 39; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 40; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]] 41; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] 42; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:BB[0-9_]+]] 43; GCN-NEXT: ; %bb.{{[0-9]+}}: 44; GCN: store_dword 45; GCN-NEXT: {{^}}[[ENDIF_INNER]]: 46; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]] 47; GCN: store_dword 48; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 49; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]] 50; GCN: ds_write_b32 51; GCN: s_endpgm 52define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) { 53bb: 54 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 55 %tmp1 = icmp ugt i32 %tmp, 1 56 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end 57 58bb.outer.then: ; preds = %bb 59 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 60 store i32 0, i32 addrspace(1)* %tmp4, align 4 61 %tmp5 = icmp eq i32 %tmp, 2 62 br i1 %tmp5, label %bb.inner.end, label %bb.inner.then 63 64bb.inner.then: ; preds = %bb.outer.then 65 %tmp7 = add i32 %tmp, 1 66 %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 67 store i32 1, i32 addrspace(1)* %tmp8, align 4 68 br label %bb.inner.end 69 70bb.inner.end: ; preds = %bb.inner.then, %bb.outer.then 71 %tmp9 = add i32 %tmp, 2 72 %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9 73 store i32 2, i32 addrspace(1)* %tmp10, align 4 74 br label %bb.outer.end 75 76bb.outer.end: ; preds = %bb.inner.then, %bb 77 store i32 3, i32 addrspace(3)* null 78 ret void 79} 80 81; GCN-LABEL: {{^}}nested_if_if_else: 82; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 83; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]] 84; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] 85; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]] 86; GCN-NEXT: s_cbranch_execz [[THEN_INNER:BB[0-9_]+]] 87; GCN-NEXT: ; %bb.{{[0-9]+}}: 88; GCN: store_dword 89; GCN-NEXT: {{^}}[[THEN_INNER]]: 90; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]] 91; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]] 92; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]] 93; GCN: store_dword 94; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 95; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]] 96; GCN: ds_write_b32 97; GCN: s_endpgm 98define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) { 99bb: 100 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 101 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 102 store i32 0, i32 addrspace(1)* %tmp1, align 4 103 %tmp2 = icmp ugt i32 %tmp, 1 104 br i1 %tmp2, label %bb.outer.then, label %bb.outer.end 105 106bb.outer.then: ; preds = %bb 107 %tmp5 = icmp eq i32 %tmp, 2 108 br i1 %tmp5, label %bb.then, label %bb.else 109 110bb.then: ; preds = %bb.outer.then 111 %tmp3 = add i32 %tmp, 1 112 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3 113 store i32 1, i32 addrspace(1)* %tmp4, align 4 114 br label %bb.outer.end 115 116bb.else: ; preds = %bb.outer.then 117 %tmp7 = add i32 %tmp, 2 118 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 119 store i32 2, i32 addrspace(1)* %tmp9, align 4 120 br label %bb.outer.end 121 122bb.outer.end: ; preds = %bb, %bb.then, %bb.else 123 store i32 3, i32 addrspace(3)* null 124 ret void 125} 126 127; GCN-LABEL: {{^}}nested_if_else_if: 128; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 129; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]] 130; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:BB[0-9_]+]] 131; GCN-NEXT: ; %bb.{{[0-9]+}}: 132; GCN: store_dword 133; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]] 134; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:BB[0-9_]+]] 135; GCN-NEXT: ; %bb.{{[0-9]+}}: 136; GCN: store_dword 137; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]: 138; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]] 139; GCN-NEXT: {{^}}[[THEN_OUTER]]: 140; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]] 141; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]] 142; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]] 143; GCN-NEXT: ; %bb.{{[0-9]+}}: 144; GCN: store_dword 145; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_ELSE:s\[[0-9:]+\]]], 146; GCN-NEXT: s_cbranch_execz [[FLOW1:BB[0-9_]+]] 147; GCN-NEXT: ; %bb.{{[0-9]+}}: 148; GCN: store_dword 149; GCN-NEXT: [[FLOW1]]: 150; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_ELSE]] 151; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]] 152; GCN: ds_write_b32 153; GCN: s_endpgm 154define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) { 155bb: 156 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 157 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 158 store i32 0, i32 addrspace(1)* %tmp1, align 4 159 %cc1 = icmp ugt i32 %tmp, 1 160 br i1 %cc1, label %bb.outer.then, label %bb.outer.else 161 162bb.outer.then: 163 %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1 164 store i32 1, i32 addrspace(1)* %tmp2, align 4 165 %cc2 = icmp eq i32 %tmp, 2 166 br i1 %cc2, label %bb.inner.then, label %bb.outer.end 167 168bb.inner.then: 169 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2 170 store i32 2, i32 addrspace(1)* %tmp3, align 4 171 br label %bb.outer.end 172 173bb.outer.else: 174 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3 175 store i32 3, i32 addrspace(1)* %tmp4, align 4 176 %cc3 = icmp eq i32 %tmp, 2 177 br i1 %cc3, label %bb.inner.then2, label %bb.outer.end 178 179bb.inner.then2: 180 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4 181 store i32 4, i32 addrspace(1)* %tmp5, align 4 182 br label %bb.outer.end 183 184bb.outer.end: 185 store i32 3, i32 addrspace(3)* null 186 ret void 187} 188 189; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier: 190; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] 191; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]] 192; GCN-NEXT: ; %bb.{{[0-9]+}}: 193; GCN: store_dword 194; GCN-NEXT: {{^}}[[ENDIF]]: 195; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]] 196; GCN: s_barrier 197; GCN-NEXT: s_endpgm 198define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) { 199bb: 200 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 201 %tmp1 = icmp ugt i32 %tmp, 1 202 br i1 %tmp1, label %bb.then, label %bb.end 203 204bb.then: ; preds = %bb 205 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 206 store i32 0, i32 addrspace(1)* %tmp4, align 4 207 br label %bb.end 208 209bb.end: ; preds = %bb.then, %bb 210 call void @llvm.amdgcn.s.barrier() 211 ret void 212} 213 214; GCN-LABEL: {{^}}scc_liveness: 215 216; GCN: [[BB1_OUTER_LOOP:BB[0-9]+_[0-9]+]]: 217; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 218; 219; GCN: [[BB1_INNER_LOOP:BB[0-9]+_[0-9]+]]: 220; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} 221; GCN: s_andn2_b64 222; GCN-NEXT: s_cbranch_execz 223 224; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: 225; GCN: s_andn2_b64 exec, exec, 226; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] 227 228; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offen 229 230; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER]], {{vcc|s\[[0-9:]+\]}} 231; GCN-NEXT: s_cbranch_execz [[BB1_OUTER_LOOP]] 232 233; GCN-NOT: s_or_b64 exec, exec 234 235; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} 236; GCN: buffer_store_dword 237; GCN: buffer_store_dword 238; GCN: buffer_store_dword 239; GCN: buffer_store_dword 240; GCN: s_setpc_b64 241define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { 242bb: 243 br label %bb1 244 245bb1: ; preds = %Flow1, %bb1, %bb 246 %tmp = icmp slt i32 %arg, 519 247 br i1 %tmp, label %bb2, label %bb1 248 249bb2: ; preds = %bb1 250 %tmp3 = icmp eq i32 %arg, 0 251 br i1 %tmp3, label %bb4, label %bb10 252 253bb4: ; preds = %bb2 254 %tmp6 = load float, float addrspace(5)* undef 255 %tmp7 = fcmp olt float %tmp6, 0.0 256 br i1 %tmp7, label %bb8, label %Flow 257 258bb8: ; preds = %bb4 259 %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1 260 br label %Flow 261 262Flow: ; preds = %bb8, %bb4 263 %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ] 264 br label %bb10 265 266bb10: ; preds = %Flow, %bb2 267 %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ] 268 br i1 %tmp3, label %bb12, label %Flow1 269 270Flow1: ; preds = %bb10 271 br label %bb1 272 273bb12: ; preds = %bb10 274 store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16 275 ret void 276} 277 278declare i32 @llvm.amdgcn.workitem.id.x() #0 279declare void @llvm.amdgcn.s.barrier() #1 280 281attributes #0 = { nounwind readnone speculatable } 282attributes #1 = { nounwind convergent } 283attributes #2 = { nounwind } 284