1; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s
2target triple = "nvptx64-nvidia-cuda"
3
4declare void @foo()
5declare void @llvm.nvvm.barrier0()
6
7; syncthreads shouldn't be duplicated.
8; CHECK: .func call_syncthreads
9; CHECK: bar.sync
10; CHECK-NOT: bar.sync
11define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
12  br i1 %cond, label %L1, label %L2
13  br i1 %cond2, label %Ret, label %L1
14Ret:
15  ret void
16L1:
17  store i32 0, i32* %a
18  br label %L42
19L2:
20  store i32 1, i32* %a
21  br label %L42
22L42:
23  call void @llvm.nvvm.barrier0()
24  br label %Ret
25}
26
27; Check that call_syncthreads really does trigger tail duplication.
28; CHECK: .func call_foo
29; CHECK: call
30; CHECK: call
31define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
32  br i1 %cond, label %L1, label %L2
33  br i1 %cond2, label %Ret, label %L1
34Ret:
35  ret void
36L1:
37  store i32 0, i32* %a
38  br label %L42
39L2:
40  store i32 1, i32* %a
41  br label %L42
42L42:
43  call void @foo()
44  br label %Ret
45}
46