1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s 2 3; Inline spiller can decide to move a spill as early as possible in the basic block. 4; It will skip phis and label, but we also need to make sure it skips instructions 5; in the basic block prologue which restore exec mask. 6; Make sure instruction to restore exec mask immediately follows label 7 8; CHECK-LABEL: {{^}}spill_cfg_position: 9; CHECK: s_cbranch_execz [[LABEL1:BB[0-9_]+]] 10; CHECK: {{^}}[[LABEL1]]: 11; CHECK: s_cbranch_execz [[LABEL2:BB[0-9_]+]] 12; CHECK: {{^}}[[LABEL2]]: 13; CHECK-NEXT: s_or_b64 exec 14; CHECK: buffer_ 15 16define amdgpu_kernel void @spill_cfg_position(i32 addrspace(1)* nocapture %arg) { 17bb: 18 %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #0 19 %tmp14 = load i32, i32 addrspace(1)* %arg, align 4 20 %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 21 %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4 22 %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 23 %tmp18 = load i32, i32 addrspace(1)* %tmp17, align 4 24 %tmp19 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3 25 %tmp20 = load i32, i32 addrspace(1)* %tmp19, align 4 26 %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4 27 %tmp22 = load i32, i32 addrspace(1)* %tmp21, align 4 28 %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 5 29 %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4 30 %tmp25 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 6 31 %tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4 32 %tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 7 33 %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4 34 %tmp29 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8 35 %tmp30 = load i32, i32 addrspace(1)* %tmp29, align 4 36 %tmp33 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp1 37 %tmp34 = load i32, i32 addrspace(1)* %tmp33, align 4 38 %tmp35 = icmp eq i32 %tmp34, 0 39 br i1 %tmp35, label %bb44, label %bb36 40 41bb36: ; preds = %bb 42 %tmp37 = mul nsw i32 %tmp20, %tmp18 43 %tmp38 = add nsw i32 %tmp37, %tmp16 44 %tmp39 = mul nsw i32 %tmp24, %tmp22 45 %tmp40 = add nsw i32 %tmp38, %tmp39 46 %tmp41 = mul nsw i32 %tmp28, %tmp26 47 %tmp42 = add nsw i32 %tmp40, %tmp41 48 %tmp43 = add nsw i32 %tmp42, %tmp30 49 br label %bb52 50 51bb44: ; preds = %bb 52 %tmp45 = mul nsw i32 %tmp18, %tmp16 53 %tmp46 = mul nsw i32 %tmp22, %tmp20 54 %tmp47 = add nsw i32 %tmp46, %tmp45 55 %tmp48 = mul nsw i32 %tmp26, %tmp24 56 %tmp49 = add nsw i32 %tmp47, %tmp48 57 %tmp50 = mul nsw i32 %tmp30, %tmp28 58 %tmp51 = add nsw i32 %tmp49, %tmp50 59 br label %bb52 60 61bb52: ; preds = %bb44, %bb36 62 %tmp53 = phi i32 [ %tmp43, %bb36 ], [ %tmp51, %bb44 ] 63 %tmp54 = mul nsw i32 %tmp16, %tmp14 64 %tmp55 = mul nsw i32 %tmp22, %tmp18 65 %tmp56 = mul nsw i32 %tmp24, %tmp20 66 %tmp57 = mul nsw i32 %tmp30, %tmp26 67 %tmp58 = add i32 %tmp55, %tmp54 68 %tmp59 = add i32 %tmp58, %tmp56 69 %tmp60 = add i32 %tmp59, %tmp28 70 %tmp61 = add i32 %tmp60, %tmp57 71 %tmp62 = add i32 %tmp61, %tmp53 72 store i32 %tmp62, i32 addrspace(1)* %tmp33, align 4 73 ret void 74} 75 76declare i32 @llvm.amdgcn.workitem.id.x() #0 77 78attributes #0 = { nounwind readnone } 79