1; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
2
3target triple = "nvptx-nvidia-cuda"
4
5; Function Attrs: nounwind
6; CHECK: .entry foo
7define void @foo(float* nocapture %a) #0 {
8  %val = load float, float* %a
9  %tan = tail call fastcc float @__nv_fast_tanf(float %val)
10  store float %tan, float* %a
11  ret void
12}
13
14; Function Attrs: nounwind readnone
15declare float @llvm.nvvm.sin.approx.ftz.f(float) #1
16
17; Function Attrs: nounwind readnone
18declare float @llvm.nvvm.cos.approx.ftz.f(float) #1
19
20; Function Attrs: nounwind readnone
21declare float @llvm.nvvm.div.approx.ftz.f(float, float) #1
22
23; Function Attrs: alwaysinline inlinehint nounwind readnone
24; CHECK: .func (.param .b32 func_retval0) __nv_fast_tanf
25define internal fastcc float @__nv_fast_tanf(float %a) #2 {
26entry:
27  %0 = tail call float @llvm.nvvm.sin.approx.ftz.f(float %a)
28  %1 = tail call float @llvm.nvvm.cos.approx.ftz.f(float %a)
29  %2 = tail call float @llvm.nvvm.div.approx.ftz.f(float %0, float %1)
30  ret float %2
31}
32
33attributes #0 = { nounwind }
34attributes #1 = { nounwind readnone }
35attributes #2 = { alwaysinline inlinehint nounwind readnone }
36
37!nvvm.annotations = !{!0}
38
39!0 = !{void (float*)* @foo, !"kernel", i32 1}
40