1 // REQUIRES: nvptx-registered-target
2 // RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
3 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
4
read_tid()5 int read_tid() {
6
7 // CHECK: call i32 @llvm.ptx.read.tid.x()
8 // CHECK: call i32 @llvm.ptx.read.tid.y()
9 // CHECK: call i32 @llvm.ptx.read.tid.z()
10 // CHECK: call i32 @llvm.ptx.read.tid.w()
11
12 int x = __builtin_ptx_read_tid_x();
13 int y = __builtin_ptx_read_tid_y();
14 int z = __builtin_ptx_read_tid_z();
15 int w = __builtin_ptx_read_tid_w();
16
17 return x + y + z + w;
18
19 }
20
read_ntid()21 int read_ntid() {
22
23 // CHECK: call i32 @llvm.ptx.read.ntid.x()
24 // CHECK: call i32 @llvm.ptx.read.ntid.y()
25 // CHECK: call i32 @llvm.ptx.read.ntid.z()
26 // CHECK: call i32 @llvm.ptx.read.ntid.w()
27
28 int x = __builtin_ptx_read_ntid_x();
29 int y = __builtin_ptx_read_ntid_y();
30 int z = __builtin_ptx_read_ntid_z();
31 int w = __builtin_ptx_read_ntid_w();
32
33 return x + y + z + w;
34
35 }
36
read_ctaid()37 int read_ctaid() {
38
39 // CHECK: call i32 @llvm.ptx.read.ctaid.x()
40 // CHECK: call i32 @llvm.ptx.read.ctaid.y()
41 // CHECK: call i32 @llvm.ptx.read.ctaid.z()
42 // CHECK: call i32 @llvm.ptx.read.ctaid.w()
43
44 int x = __builtin_ptx_read_ctaid_x();
45 int y = __builtin_ptx_read_ctaid_y();
46 int z = __builtin_ptx_read_ctaid_z();
47 int w = __builtin_ptx_read_ctaid_w();
48
49 return x + y + z + w;
50
51 }
52
read_nctaid()53 int read_nctaid() {
54
55 // CHECK: call i32 @llvm.ptx.read.nctaid.x()
56 // CHECK: call i32 @llvm.ptx.read.nctaid.y()
57 // CHECK: call i32 @llvm.ptx.read.nctaid.z()
58 // CHECK: call i32 @llvm.ptx.read.nctaid.w()
59
60 int x = __builtin_ptx_read_nctaid_x();
61 int y = __builtin_ptx_read_nctaid_y();
62 int z = __builtin_ptx_read_nctaid_z();
63 int w = __builtin_ptx_read_nctaid_w();
64
65 return x + y + z + w;
66
67 }
68
read_ids()69 int read_ids() {
70
71 // CHECK: call i32 @llvm.ptx.read.laneid()
72 // CHECK: call i32 @llvm.ptx.read.warpid()
73 // CHECK: call i32 @llvm.ptx.read.nwarpid()
74 // CHECK: call i32 @llvm.ptx.read.smid()
75 // CHECK: call i32 @llvm.ptx.read.nsmid()
76 // CHECK: call i32 @llvm.ptx.read.gridid()
77
78 int a = __builtin_ptx_read_laneid();
79 int b = __builtin_ptx_read_warpid();
80 int c = __builtin_ptx_read_nwarpid();
81 int d = __builtin_ptx_read_smid();
82 int e = __builtin_ptx_read_nsmid();
83 int f = __builtin_ptx_read_gridid();
84
85 return a + b + c + d + e + f;
86
87 }
88
read_lanemasks()89 int read_lanemasks() {
90
91 // CHECK: call i32 @llvm.ptx.read.lanemask.eq()
92 // CHECK: call i32 @llvm.ptx.read.lanemask.le()
93 // CHECK: call i32 @llvm.ptx.read.lanemask.lt()
94 // CHECK: call i32 @llvm.ptx.read.lanemask.ge()
95 // CHECK: call i32 @llvm.ptx.read.lanemask.gt()
96
97 int a = __builtin_ptx_read_lanemask_eq();
98 int b = __builtin_ptx_read_lanemask_le();
99 int c = __builtin_ptx_read_lanemask_lt();
100 int d = __builtin_ptx_read_lanemask_ge();
101 int e = __builtin_ptx_read_lanemask_gt();
102
103 return a + b + c + d + e;
104
105 }
106
107
read_clocks()108 long read_clocks() {
109
110 // CHECK: call i32 @llvm.ptx.read.clock()
111 // CHECK: call i64 @llvm.ptx.read.clock64()
112
113 int a = __builtin_ptx_read_clock();
114 long b = __builtin_ptx_read_clock64();
115
116 return (long)a + b;
117
118 }
119
read_pms()120 int read_pms() {
121
122 // CHECK: call i32 @llvm.ptx.read.pm0()
123 // CHECK: call i32 @llvm.ptx.read.pm1()
124 // CHECK: call i32 @llvm.ptx.read.pm2()
125 // CHECK: call i32 @llvm.ptx.read.pm3()
126
127 int a = __builtin_ptx_read_pm0();
128 int b = __builtin_ptx_read_pm1();
129 int c = __builtin_ptx_read_pm2();
130 int d = __builtin_ptx_read_pm3();
131
132 return a + b + c + d;
133
134 }
135
sync()136 void sync() {
137
138 // CHECK: call void @llvm.ptx.bar.sync(i32 0)
139
140 __builtin_ptx_bar_sync(0);
141
142 }
143
144
145 // NVVM intrinsics
146
147 // The idea is not to test all intrinsics, just that Clang is recognizing the
148 // builtins defined in BuiltinsNVPTX.def
nvvm_math(float f1,float f2,double d1,double d2)149 void nvvm_math(float f1, float f2, double d1, double d2) {
150 // CHECK: call float @llvm.nvvm.fmax.f
151 float t1 = __nvvm_fmax_f(f1, f2);
152 // CHECK: call float @llvm.nvvm.fmin.f
153 float t2 = __nvvm_fmin_f(f1, f2);
154 // CHECK: call float @llvm.nvvm.sqrt.rn.f
155 float t3 = __nvvm_sqrt_rn_f(f1);
156 // CHECK: call float @llvm.nvvm.rcp.rn.f
157 float t4 = __nvvm_rcp_rn_f(f2);
158 // CHECK: call float @llvm.nvvm.add.rn.f
159 float t5 = __nvvm_add_rn_f(f1, f2);
160
161 // CHECK: call double @llvm.nvvm.fmax.d
162 double td1 = __nvvm_fmax_d(d1, d2);
163 // CHECK: call double @llvm.nvvm.fmin.d
164 double td2 = __nvvm_fmin_d(d1, d2);
165 // CHECK: call double @llvm.nvvm.sqrt.rn.d
166 double td3 = __nvvm_sqrt_rn_d(d1);
167 // CHECK: call double @llvm.nvvm.rcp.rn.d
168 double td4 = __nvvm_rcp_rn_d(d2);
169
170 // CHECK: call void @llvm.nvvm.membar.cta()
171 __nvvm_membar_cta();
172 // CHECK: call void @llvm.nvvm.membar.gl()
173 __nvvm_membar_gl();
174 // CHECK: call void @llvm.nvvm.membar.sys()
175 __nvvm_membar_sys();
176 // CHECK: call void @llvm.nvvm.barrier0()
177 __nvvm_bar0();
178 }
179