1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}simple_nested_if: 4; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] 5; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]] 6; GCN-NEXT: s_cbranch_execz [[ENDIF]] 7; GCN: s_and_b64 exec, exec, vcc 8; GCN-NEXT: ; mask branch [[ENDIF]] 9; GCN-NEXT: {{^BB[0-9_]+}}: 10; GCN: store_dword 11; GCN-NEXT: {{^}}[[ENDIF]]: 12; GCN-NEXT: s_endpgm 13define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) { 14bb: 15 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 16 %tmp1 = icmp ugt i32 %tmp, 1 17 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end 18 19bb.outer.then: ; preds = %bb 20 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 21 store i32 0, i32 addrspace(1)* %tmp4, align 4 22 %tmp5 = icmp eq i32 %tmp, 2 23 br i1 %tmp5, label %bb.outer.end, label %bb.inner.then 24 25bb.inner.then: ; preds = %bb.outer.then 26 %tmp7 = add i32 %tmp, 1 27 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 28 store i32 1, i32 addrspace(1)* %tmp9, align 4 29 br label %bb.outer.end 30 31bb.outer.end: ; preds = %bb.outer.then, %bb.inner.then, %bb 32 ret void 33} 34 35; GCN-LABEL: {{^}}uncollapsable_nested_if: 36; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 37; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]] 38; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]] 39; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] 40; GCN-NEXT: ; mask branch [[ENDIF_INNER:BB[0-9_]+]] 41; GCN-NEXT: {{^BB[0-9_]+}}: 42; GCN: store_dword 43; GCN-NEXT: {{^}}[[ENDIF_INNER]]: 44; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]] 45; GCN: store_dword 46; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 47; GCN-NEXT: s_endpgm 48define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) { 49bb: 50 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 51 %tmp1 = icmp ugt i32 %tmp, 1 52 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end 53 54bb.outer.then: ; preds = %bb 55 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 56 store i32 0, i32 addrspace(1)* %tmp4, align 4 57 %tmp5 = icmp eq i32 %tmp, 2 58 br i1 %tmp5, label %bb.inner.end, label %bb.inner.then 59 60bb.inner.then: ; preds = %bb.outer.then 61 %tmp7 = add i32 %tmp, 1 62 %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 63 store i32 1, i32 addrspace(1)* %tmp8, align 4 64 br label %bb.inner.end 65 66bb.inner.end: ; preds = %bb.inner.then, %bb.outer.then 67 %tmp9 = add i32 %tmp, 2 68 %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9 69 store i32 2, i32 addrspace(1)* %tmp10, align 4 70 br label %bb.outer.end 71 72bb.outer.end: ; preds = %bb.inner.then, %bb 73 ret void 74} 75 76; GCN-LABEL: {{^}}nested_if_if_else: 77; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 78; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]] 79; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]] 80; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] 81; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]] 82; GCN-NEXT: ; mask branch [[THEN_INNER:BB[0-9_]+]] 83; GCN-NEXT: {{^BB[0-9_]+}}: 84; GCN: store_dword 85; GCN-NEXT: {{^}}[[THEN_INNER]]: 86; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]] 87; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]] 88; GCN-NEXT: ; mask branch [[ENDIF_OUTER]] 89; GCN: store_dword 90; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 91; GCN-NEXT: s_endpgm 92define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) { 93bb: 94 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 95 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 96 store i32 0, i32 addrspace(1)* %tmp1, align 4 97 %tmp2 = icmp ugt i32 %tmp, 1 98 br i1 %tmp2, label %bb.outer.then, label %bb.outer.end 99 100bb.outer.then: ; preds = %bb 101 %tmp5 = icmp eq i32 %tmp, 2 102 br i1 %tmp5, label %bb.then, label %bb.else 103 104bb.then: ; preds = %bb.outer.then 105 %tmp3 = add i32 %tmp, 1 106 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3 107 store i32 1, i32 addrspace(1)* %tmp4, align 4 108 br label %bb.outer.end 109 110bb.else: ; preds = %bb.outer.then 111 %tmp7 = add i32 %tmp, 2 112 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 113 store i32 2, i32 addrspace(1)* %tmp9, align 4 114 br label %bb.outer.end 115 116bb.outer.end: ; preds = %bb, %bb.then, %bb.else 117 ret void 118} 119 120; GCN-LABEL: {{^}}nested_if_else_if: 121; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 122; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]] 123; GCN-NEXT: ; mask branch [[THEN_OUTER:BB[0-9_]+]] 124; GCN-NEXT: s_cbranch_execz [[THEN_OUTER]] 125; GCN-NEXT: {{^BB[0-9_]+}}: 126; GCN: store_dword 127; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]] 128; GCN-NEXT: ; mask branch [[THEN_OUTER_FLOW:BB[0-9_]+]] 129; GCN-NEXT: {{^BB[0-9_]+}}: 130; GCN: store_dword 131; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]: 132; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]] 133; GCN-NEXT: {{^}}[[THEN_OUTER]]: 134; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]] 135; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]] 136; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]] 137; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]] 138; GCN-NEXT: {{^BB[0-9_]+}}: 139; GCN: store_dword 140; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]] 141; GCN-NEXT: ; mask branch [[ENDIF_OUTER]] 142; GCN-NEXT: {{^BB[0-9_]+}}: 143; GCN: store_dword 144; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 145; GCN-NEXT: s_endpgm 146define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) { 147bb: 148 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 149 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 150 store i32 0, i32 addrspace(1)* %tmp1, align 4 151 %cc1 = icmp ugt i32 %tmp, 1 152 br i1 %cc1, label %bb.outer.then, label %bb.outer.else 153 154bb.outer.then: 155 %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1 156 store i32 1, i32 addrspace(1)* %tmp2, align 4 157 %cc2 = icmp eq i32 %tmp, 2 158 br i1 %cc2, label %bb.inner.then, label %bb.outer.end 159 160bb.inner.then: 161 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2 162 store i32 2, i32 addrspace(1)* %tmp3, align 4 163 br label %bb.outer.end 164 165bb.outer.else: 166 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3 167 store i32 3, i32 addrspace(1)* %tmp4, align 4 168 %cc3 = icmp eq i32 %tmp, 2 169 br i1 %cc3, label %bb.inner.then2, label %bb.outer.end 170 171bb.inner.then2: 172 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4 173 store i32 4, i32 addrspace(1)* %tmp5, align 4 174 br label %bb.outer.end 175 176bb.outer.end: 177 ret void 178} 179 180; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier: 181; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] 182; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]] 183; GCN-NEXT: {{^BB[0-9_]+}}: 184; GCN: store_dword 185; GCN-NEXT: {{^}}[[ENDIF]]: 186; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]] 187; GCN: s_barrier 188; GCN-NEXT: s_endpgm 189define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) { 190bb: 191 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 192 %tmp1 = icmp ugt i32 %tmp, 1 193 br i1 %tmp1, label %bb.then, label %bb.end 194 195bb.then: ; preds = %bb 196 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 197 store i32 0, i32 addrspace(1)* %tmp4, align 4 198 br label %bb.end 199 200bb.end: ; preds = %bb.then, %bb 201 call void @llvm.amdgcn.s.barrier() 202 ret void 203} 204 205; Make sure scc liveness is updated if sor_b64 is removed 206; GCN-LABEL: {{^}}scc_liveness: 207 208; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: 209; GCN: s_andn2_b64 exec, exec, 210; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] 211 212; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen 213; GCN: s_and_b64 exec, exec, {{vcc|s\[[0-9:]+\]}} 214 215; GCN-NOT: s_or_b64 exec, exec 216 217; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} 218; GCN: s_andn2_b64 219; GCN-NEXT: s_cbranch_execnz 220 221; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} 222; GCN: buffer_store_dword 223; GCN: buffer_store_dword 224; GCN: buffer_store_dword 225; GCN: buffer_store_dword 226; GCN: s_setpc_b64 227define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { 228bb: 229 br label %bb1 230 231bb1: ; preds = %Flow1, %bb1, %bb 232 %tmp = icmp slt i32 %arg, 519 233 br i1 %tmp, label %bb2, label %bb1 234 235bb2: ; preds = %bb1 236 %tmp3 = icmp eq i32 %arg, 0 237 br i1 %tmp3, label %bb4, label %bb10 238 239bb4: ; preds = %bb2 240 %tmp6 = load float, float addrspace(5)* undef 241 %tmp7 = fcmp olt float %tmp6, 0.0 242 br i1 %tmp7, label %bb8, label %Flow 243 244bb8: ; preds = %bb4 245 %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1 246 br label %Flow 247 248Flow: ; preds = %bb8, %bb4 249 %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ] 250 br label %bb10 251 252bb10: ; preds = %Flow, %bb2 253 %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ] 254 br i1 %tmp3, label %bb12, label %Flow1 255 256Flow1: ; preds = %bb10 257 br label %bb1 258 259bb12: ; preds = %bb10 260 store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16 261 ret void 262} 263 264declare i32 @llvm.amdgcn.workitem.id.x() #0 265declare void @llvm.amdgcn.s.barrier() #1 266 267attributes #0 = { nounwind readnone speculatable } 268attributes #1 = { nounwind convergent } 269attributes #2 = { nounwind } 270