1; RUN: llc -O2 -tail-dup-size=100 -enable-tail-merge=0 < %s | FileCheck %s 2target triple = "nvptx64-nvidia-cuda" 3 4declare void @foo() 5declare void @llvm.nvvm.barrier0() 6 7; syncthreads shouldn't be duplicated. 8; CHECK: .func call_syncthreads 9; CHECK: bar.sync 10; CHECK-NOT: bar.sync 11define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind { 12 br i1 %cond, label %L1, label %L2 13 br i1 %cond2, label %Ret, label %L1 14Ret: 15 ret void 16L1: 17 store i32 0, i32* %a 18 br label %L42 19L2: 20 store i32 1, i32* %a 21 br label %L42 22L42: 23 call void @llvm.nvvm.barrier0() 24 br label %Ret 25} 26 27; Check that call_syncthreads really does trigger tail duplication. 28; CHECK: .func call_foo 29; CHECK: call 30; CHECK: call 31define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind { 32 br i1 %cond, label %L1, label %L2 33 br i1 %cond2, label %Ret, label %L1 34Ret: 35 ret void 36L1: 37 store i32 0, i32* %a 38 br label %L42 39L2: 40 store i32 1, i32* %a 41 br label %L42 42L42: 43 call void @foo() 44 br label %Ret 45} 46