1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope %s 2; Although it's modeled without any control flow in order to get better code 3; out of the structurizer, @llvm.amdgcn.kill actually ends the thread that calls 4; it with "true". In case it's called in a provably infinite loop, we still 5; need to successfully exit and export something, even if we can't know where 6; to jump to in the LLVM IR. Therefore we insert a null export ourselves in 7; this case right before the s_endpgm to avoid GPU hangs, which is what this 8; tests. 9 10; CHECK-LABEL: return_void 11; Make sure that we remove the done bit from the original export 12; CHECK: exp mrt0 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} vm 13; CHECK: exp null off, off, off, off done vm 14; CHECK-NEXT: s_endpgm 15define amdgpu_ps void @return_void(float %0) #0 { 16main_body: 17 %cmp = fcmp olt float %0, 1.000000e+01 18 br i1 %cmp, label %end, label %loop 19 20loop: 21 call void @llvm.amdgcn.kill(i1 false) #3 22 br label %loop 23 24end: 25 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 0., float 0., float 0., float 1., i1 true, i1 true) #3 26 ret void 27} 28 29; Check that we also remove the done bit from compressed exports correctly. 30; CHECK-LABEL: return_void_compr 31; CHECK: exp mrt0 v{{[0-9]+}}, off, v{{[0-9]+}}, off compr vm 32; CHECK: exp null off, off, off, off done vm 33; CHECK-NEXT: s_endpgm 34define amdgpu_ps void @return_void_compr(float %0) #0 { 35main_body: 36 %cmp = fcmp olt float %0, 1.000000e+01 37 br i1 %cmp, label %end, label %loop 38 39loop: 40 call void @llvm.amdgcn.kill(i1 false) #3 41 br label %loop 42 43end: 44 call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 5, <2 x i16> < i16 0, i16 0 >, <2 x i16> < i16 0, i16 0 >, i1 true, i1 true) #3 45 ret void 46} 47 48; test the case where there's only a kill in an infinite loop 49; CHECK-LABEL: only_kill 50; CHECK: exp null off, off, off, off done vm 51; CHECK-NEXT: s_endpgm 52; SIInsertSkips inserts an extra null export here, but it should be harmless. 53; CHECK: exp null off, off, off, off done vm 54; CHECK-NEXT: s_endpgm 55define amdgpu_ps void @only_kill() #0 { 56main_body: 57 br label %loop 58 59loop: 60 call void @llvm.amdgcn.kill(i1 false) #3 61 br label %loop 62} 63 64; Check that the epilog is the final block 65; CHECK-LABEL: return_nonvoid 66; CHECK: exp null off, off, off, off done vm 67; CHECK-NEXT: s_endpgm 68; CHECK-NEXT: BB{{[0-9]+}}_{{[0-9]+}}: 69define amdgpu_ps float @return_nonvoid(float %0) #0 { 70main_body: 71 %cmp = fcmp olt float %0, 1.000000e+01 72 br i1 %cmp, label %end, label %loop 73 74loop: 75 call void @llvm.amdgcn.kill(i1 false) #3 76 br label %loop 77 78end: 79 ret float 0. 80} 81 82declare void @llvm.amdgcn.kill(i1) #0 83declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0 84declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0 85 86attributes #0 = { nounwind } 87