1 // REQUIRES: nvptx-registered-target
2 // RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
3 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
4 
read_tid()5 int read_tid() {
6 
7 // CHECK: call i32 @llvm.ptx.read.tid.x()
8 // CHECK: call i32 @llvm.ptx.read.tid.y()
9 // CHECK: call i32 @llvm.ptx.read.tid.z()
10 // CHECK: call i32 @llvm.ptx.read.tid.w()
11 
12   int x = __builtin_ptx_read_tid_x();
13   int y = __builtin_ptx_read_tid_y();
14   int z = __builtin_ptx_read_tid_z();
15   int w = __builtin_ptx_read_tid_w();
16 
17   return x + y + z + w;
18 
19 }
20 
read_ntid()21 int read_ntid() {
22 
23 // CHECK: call i32 @llvm.ptx.read.ntid.x()
24 // CHECK: call i32 @llvm.ptx.read.ntid.y()
25 // CHECK: call i32 @llvm.ptx.read.ntid.z()
26 // CHECK: call i32 @llvm.ptx.read.ntid.w()
27 
28   int x = __builtin_ptx_read_ntid_x();
29   int y = __builtin_ptx_read_ntid_y();
30   int z = __builtin_ptx_read_ntid_z();
31   int w = __builtin_ptx_read_ntid_w();
32 
33   return x + y + z + w;
34 
35 }
36 
read_ctaid()37 int read_ctaid() {
38 
39 // CHECK: call i32 @llvm.ptx.read.ctaid.x()
40 // CHECK: call i32 @llvm.ptx.read.ctaid.y()
41 // CHECK: call i32 @llvm.ptx.read.ctaid.z()
42 // CHECK: call i32 @llvm.ptx.read.ctaid.w()
43 
44   int x = __builtin_ptx_read_ctaid_x();
45   int y = __builtin_ptx_read_ctaid_y();
46   int z = __builtin_ptx_read_ctaid_z();
47   int w = __builtin_ptx_read_ctaid_w();
48 
49   return x + y + z + w;
50 
51 }
52 
read_nctaid()53 int read_nctaid() {
54 
55 // CHECK: call i32 @llvm.ptx.read.nctaid.x()
56 // CHECK: call i32 @llvm.ptx.read.nctaid.y()
57 // CHECK: call i32 @llvm.ptx.read.nctaid.z()
58 // CHECK: call i32 @llvm.ptx.read.nctaid.w()
59 
60   int x = __builtin_ptx_read_nctaid_x();
61   int y = __builtin_ptx_read_nctaid_y();
62   int z = __builtin_ptx_read_nctaid_z();
63   int w = __builtin_ptx_read_nctaid_w();
64 
65   return x + y + z + w;
66 
67 }
68 
read_ids()69 int read_ids() {
70 
71 // CHECK: call i32 @llvm.ptx.read.laneid()
72 // CHECK: call i32 @llvm.ptx.read.warpid()
73 // CHECK: call i32 @llvm.ptx.read.nwarpid()
74 // CHECK: call i32 @llvm.ptx.read.smid()
75 // CHECK: call i32 @llvm.ptx.read.nsmid()
76 // CHECK: call i32 @llvm.ptx.read.gridid()
77 
78   int a = __builtin_ptx_read_laneid();
79   int b = __builtin_ptx_read_warpid();
80   int c = __builtin_ptx_read_nwarpid();
81   int d = __builtin_ptx_read_smid();
82   int e = __builtin_ptx_read_nsmid();
83   int f = __builtin_ptx_read_gridid();
84 
85   return a + b + c + d + e + f;
86 
87 }
88 
read_lanemasks()89 int read_lanemasks() {
90 
91 // CHECK: call i32 @llvm.ptx.read.lanemask.eq()
92 // CHECK: call i32 @llvm.ptx.read.lanemask.le()
93 // CHECK: call i32 @llvm.ptx.read.lanemask.lt()
94 // CHECK: call i32 @llvm.ptx.read.lanemask.ge()
95 // CHECK: call i32 @llvm.ptx.read.lanemask.gt()
96 
97   int a = __builtin_ptx_read_lanemask_eq();
98   int b = __builtin_ptx_read_lanemask_le();
99   int c = __builtin_ptx_read_lanemask_lt();
100   int d = __builtin_ptx_read_lanemask_ge();
101   int e = __builtin_ptx_read_lanemask_gt();
102 
103   return a + b + c + d + e;
104 
105 }
106 
107 
read_clocks()108 long read_clocks() {
109 
110 // CHECK: call i32 @llvm.ptx.read.clock()
111 // CHECK: call i64 @llvm.ptx.read.clock64()
112 
113   int a = __builtin_ptx_read_clock();
114   long b = __builtin_ptx_read_clock64();
115 
116   return (long)a + b;
117 
118 }
119 
read_pms()120 int read_pms() {
121 
122 // CHECK: call i32 @llvm.ptx.read.pm0()
123 // CHECK: call i32 @llvm.ptx.read.pm1()
124 // CHECK: call i32 @llvm.ptx.read.pm2()
125 // CHECK: call i32 @llvm.ptx.read.pm3()
126 
127   int a = __builtin_ptx_read_pm0();
128   int b = __builtin_ptx_read_pm1();
129   int c = __builtin_ptx_read_pm2();
130   int d = __builtin_ptx_read_pm3();
131 
132   return a + b + c + d;
133 
134 }
135 
sync()136 void sync() {
137 
138 // CHECK: call void @llvm.ptx.bar.sync(i32 0)
139 
140   __builtin_ptx_bar_sync(0);
141 
142 }
143 
144 
145 // NVVM intrinsics
146 
147 // The idea is not to test all intrinsics, just that Clang is recognizing the
148 // builtins defined in BuiltinsNVPTX.def
nvvm_math(float f1,float f2,double d1,double d2)149 void nvvm_math(float f1, float f2, double d1, double d2) {
150 // CHECK: call float @llvm.nvvm.fmax.f
151   float t1 = __nvvm_fmax_f(f1, f2);
152 // CHECK: call float @llvm.nvvm.fmin.f
153   float t2 = __nvvm_fmin_f(f1, f2);
154 // CHECK: call float @llvm.nvvm.sqrt.rn.f
155   float t3 = __nvvm_sqrt_rn_f(f1);
156 // CHECK: call float @llvm.nvvm.rcp.rn.f
157   float t4 = __nvvm_rcp_rn_f(f2);
158 // CHECK: call float @llvm.nvvm.add.rn.f
159   float t5 = __nvvm_add_rn_f(f1, f2);
160 
161 // CHECK: call double @llvm.nvvm.fmax.d
162   double td1 = __nvvm_fmax_d(d1, d2);
163 // CHECK: call double @llvm.nvvm.fmin.d
164   double td2 = __nvvm_fmin_d(d1, d2);
165 // CHECK: call double @llvm.nvvm.sqrt.rn.d
166   double td3 = __nvvm_sqrt_rn_d(d1);
167 // CHECK: call double @llvm.nvvm.rcp.rn.d
168   double td4 = __nvvm_rcp_rn_d(d2);
169 
170 // CHECK: call void @llvm.nvvm.membar.cta()
171   __nvvm_membar_cta();
172 // CHECK: call void @llvm.nvvm.membar.gl()
173   __nvvm_membar_gl();
174 // CHECK: call void @llvm.nvvm.membar.sys()
175   __nvvm_membar_sys();
176 // CHECK: call void @llvm.nvvm.barrier0()
177   __nvvm_bar0();
178 }
179