1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
2; Although it's modeled without any control flow in order to get better code
3; out of the structurizer, @llvm.amdgcn.kill actually ends the thread that calls
4; it with "true". In case it's called in a provably infinite loop, we still
5; need to successfully exit and export something, even if we can't know where
6; to jump to in the LLVM IR. Therefore we insert a null export ourselves in
7; this case right before the s_endpgm to avoid GPU hangs, which is what this
8; tests.
9
10; CHECK-LABEL: return_void
11; Make sure that we remove the done bit from the original export
12; CHECK: exp mrt0 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} vm
13; CHECK: exp null off, off, off, off done vm
14; CHECK-NEXT: s_endpgm
15define amdgpu_ps void @return_void(float %0) #0 {
16main_body:
17  %cmp = fcmp olt float %0, 1.000000e+01
18  br i1 %cmp, label %end, label %loop
19
20loop:
21  call void @llvm.amdgcn.kill(i1 false) #3
22  br label %loop
23
24end:
25  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 0., float 0., float 0., float 1., i1 true, i1 true) #3
26  ret void
27}
28
29; Check that we also remove the done bit from compressed exports correctly.
30; CHECK-LABEL: return_void_compr
31; CHECK: exp mrt0 v{{[0-9]+}}, off, v{{[0-9]+}}, off compr vm
32; CHECK: exp null off, off, off, off done vm
33; CHECK-NEXT: s_endpgm
34define amdgpu_ps void @return_void_compr(float %0) #0 {
35main_body:
36  %cmp = fcmp olt float %0, 1.000000e+01
37  br i1 %cmp, label %end, label %loop
38
39loop:
40  call void @llvm.amdgcn.kill(i1 false) #3
41  br label %loop
42
43end:
44  call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 5, <2 x i16> < i16 0, i16 0 >, <2 x i16> < i16 0, i16 0 >, i1 true, i1 true) #3
45  ret void
46}
47
48; test the case where there's only a kill in an infinite loop
49; CHECK-LABEL: only_kill
50; CHECK: exp null off, off, off, off done vm
51; CHECK-NEXT: s_endpgm
52; SIInsertSkips inserts an extra null export here, but it should be harmless.
53; CHECK: exp null off, off, off, off done vm
54; CHECK-NEXT: s_endpgm
55define amdgpu_ps void @only_kill() #0 {
56main_body:
57  br label %loop
58
59loop:
60  call void @llvm.amdgcn.kill(i1 false) #3
61  br label %loop
62}
63
64; Check that the epilog is the final block
65; CHECK-LABEL: return_nonvoid
66; CHECK: exp null off, off, off, off done vm
67; CHECK-NEXT: s_endpgm
68; CHECK-NEXT: BB{{[0-9]+}}_{{[0-9]+}}:
69define amdgpu_ps float @return_nonvoid(float %0) #0 {
70main_body:
71  %cmp = fcmp olt float %0, 1.000000e+01
72  br i1 %cmp, label %end, label %loop
73
74loop:
75  call void @llvm.amdgcn.kill(i1 false) #3
76  br label %loop
77
78end:
79  ret float 0.
80}
81
82declare void @llvm.amdgcn.kill(i1) #0
83declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
84declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0
85
86attributes #0 = { nounwind }
87