1; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s
2
3; Inline spiller can decide to move a spill as early as possible in the basic block.
4; It will skip phis and label, but we also need to make sure it skips instructions
5; in the basic block prologue which restore exec mask.
6; Make sure instruction to restore exec mask immediately follows label
7
8; CHECK-LABEL: {{^}}spill_cfg_position:
9; CHECK: s_cbranch_execz [[LABEL1:BB[0-9_]+]]
10; CHECK: {{^}}[[LABEL1]]:
11; CHECK: s_cbranch_execz [[LABEL2:BB[0-9_]+]]
12; CHECK: {{^}}[[LABEL2]]:
13; CHECK-NEXT: s_or_b64 exec
14; CHECK: buffer_
15
16define amdgpu_kernel void @spill_cfg_position(i32 addrspace(1)* nocapture %arg) {
17bb:
18  %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #0
19  %tmp14 = load i32, i32 addrspace(1)* %arg, align 4
20  %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
21  %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
22  %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
23  %tmp18 = load i32, i32 addrspace(1)* %tmp17, align 4
24  %tmp19 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
25  %tmp20 = load i32, i32 addrspace(1)* %tmp19, align 4
26  %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4
27  %tmp22 = load i32, i32 addrspace(1)* %tmp21, align 4
28  %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 5
29  %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4
30  %tmp25 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 6
31  %tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4
32  %tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 7
33  %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4
34  %tmp29 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8
35  %tmp30 = load i32, i32 addrspace(1)* %tmp29, align 4
36  %tmp33 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp1
37  %tmp34 = load i32, i32 addrspace(1)* %tmp33, align 4
38  %tmp35 = icmp eq i32 %tmp34, 0
39  br i1 %tmp35, label %bb44, label %bb36
40
41bb36:                                             ; preds = %bb
42  %tmp37 = mul nsw i32 %tmp20, %tmp18
43  %tmp38 = add nsw i32 %tmp37, %tmp16
44  %tmp39 = mul nsw i32 %tmp24, %tmp22
45  %tmp40 = add nsw i32 %tmp38, %tmp39
46  %tmp41 = mul nsw i32 %tmp28, %tmp26
47  %tmp42 = add nsw i32 %tmp40, %tmp41
48  %tmp43 = add nsw i32 %tmp42, %tmp30
49  br label %bb52
50
51bb44:                                             ; preds = %bb
52  %tmp45 = mul nsw i32 %tmp18, %tmp16
53  %tmp46 = mul nsw i32 %tmp22, %tmp20
54  %tmp47 = add nsw i32 %tmp46, %tmp45
55  %tmp48 = mul nsw i32 %tmp26, %tmp24
56  %tmp49 = add nsw i32 %tmp47, %tmp48
57  %tmp50 = mul nsw i32 %tmp30, %tmp28
58  %tmp51 = add nsw i32 %tmp49, %tmp50
59  br label %bb52
60
61bb52:                                             ; preds = %bb44, %bb36
62  %tmp53 = phi i32 [ %tmp43, %bb36 ], [ %tmp51, %bb44 ]
63  %tmp54 = mul nsw i32 %tmp16, %tmp14
64  %tmp55 = mul nsw i32 %tmp22, %tmp18
65  %tmp56 = mul nsw i32 %tmp24, %tmp20
66  %tmp57 = mul nsw i32 %tmp30, %tmp26
67  %tmp58 = add i32 %tmp55, %tmp54
68  %tmp59 = add i32 %tmp58, %tmp56
69  %tmp60 = add i32 %tmp59, %tmp28
70  %tmp61 = add i32 %tmp60, %tmp57
71  %tmp62 = add i32 %tmp61, %tmp53
72  store i32 %tmp62, i32 addrspace(1)* %tmp33, align 4
73  ret void
74}
75
76declare i32 @llvm.amdgcn.workitem.id.x() #0
77
78attributes #0 = { nounwind readnone }
79