1; RUN: llc < %s -march=ptx32 | FileCheck %s
2
3;CHECK: .extern .global .b8 array_i16[20];
4@array_i16 = external global [10 x i16]
5
6;CHECK: .extern .const .b8 array_constant_i16[20];
7@array_constant_i16 = external addrspace(1) constant [10 x i16]
8
9;CHECK: .extern .shared .b8 array_shared_i16[20];
10@array_shared_i16 = external addrspace(4) global [10 x i16]
11
12;CHECK: .extern .global .b8 array_i32[40];
13@array_i32 = external global [10 x i32]
14
15;CHECK: .extern .const .b8 array_constant_i32[40];
16@array_constant_i32 = external addrspace(1) constant [10 x i32]
17
18;CHECK: .extern .shared .b8 array_shared_i32[40];
19@array_shared_i32 = external addrspace(4) global [10 x i32]
20
21;CHECK: .extern .global .b8 array_i64[80];
22@array_i64 = external global [10 x i64]
23
24;CHECK: .extern .const .b8 array_constant_i64[80];
25@array_constant_i64 = external addrspace(1) constant [10 x i64]
26
27;CHECK: .extern .shared .b8 array_shared_i64[80];
28@array_shared_i64 = external addrspace(4) global [10 x i64]
29
30;CHECK: .extern .global .b8 array_float[40];
31@array_float = external global [10 x float]
32
33;CHECK: .extern .const .b8 array_constant_float[40];
34@array_constant_float = external addrspace(1) constant [10 x float]
35
36;CHECK: .extern .shared .b8 array_shared_float[40];
37@array_shared_float = external addrspace(4) global [10 x float]
38
39;CHECK: .extern .global .b8 array_double[80];
40@array_double = external global [10 x double]
41
42;CHECK: .extern .const .b8 array_constant_double[80];
43@array_constant_double = external addrspace(1) constant [10 x double]
44
45;CHECK: .extern .shared .b8 array_shared_double[80];
46@array_shared_double = external addrspace(4) global [10 x double]
47
48
49define ptx_device i16 @t1_u16(i16* %p) {
50entry:
51;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
52;CHECK: ret;
53  %x = load i16* %p
54  ret i16 %x
55}
56
57define ptx_device i32 @t1_u32(i32* %p) {
58entry:
59;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
60;CHECK: ret;
61  %x = load i32* %p
62  ret i32 %x
63}
64
65define ptx_device i64 @t1_u64(i64* %p) {
66entry:
67;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
68;CHECK: ret;
69  %x = load i64* %p
70  ret i64 %x
71}
72
73define ptx_device float @t1_f32(float* %p) {
74entry:
75;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
76;CHECK: ret;
77  %x = load float* %p
78  ret float %x
79}
80
81define ptx_device double @t1_f64(double* %p) {
82entry:
83;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
84;CHECK: ret;
85  %x = load double* %p
86  ret double %x
87}
88
89define ptx_device i16 @t2_u16(i16* %p) {
90entry:
91;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}+2];
92;CHECK: ret;
93  %i = getelementptr i16* %p, i32 1
94  %x = load i16* %i
95  ret i16 %x
96}
97
98define ptx_device i32 @t2_u32(i32* %p) {
99entry:
100;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
101;CHECK: ret;
102  %i = getelementptr i32* %p, i32 1
103  %x = load i32* %i
104  ret i32 %x
105}
106
107define ptx_device i64 @t2_u64(i64* %p) {
108entry:
109;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
110;CHECK: ret;
111  %i = getelementptr i64* %p, i32 1
112  %x = load i64* %i
113  ret i64 %x
114}
115
116define ptx_device float @t2_f32(float* %p) {
117entry:
118;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
119;CHECK: ret;
120  %i = getelementptr float* %p, i32 1
121  %x = load float* %i
122  ret float %x
123}
124
125define ptx_device double @t2_f64(double* %p) {
126entry:
127;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
128;CHECK: ret;
129  %i = getelementptr double* %p, i32 1
130  %x = load double* %i
131  ret double %x
132}
133
134define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
135entry:
136;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
137;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
138;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
139  %i = getelementptr i16* %p, i32 %q
140  %x = load i16* %i
141  ret i16 %x
142}
143
144define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
145entry:
146;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
147;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
148;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
149  %i = getelementptr i32* %p, i32 %q
150  %x = load i32* %i
151  ret i32 %x
152}
153
154define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
155entry:
156;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
157;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
158;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
159  %i = getelementptr i64* %p, i32 %q
160  %x = load i64* %i
161  ret i64 %x
162}
163
164define ptx_device float @t3_f32(float* %p, i32 %q) {
165entry:
166;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
167;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
168;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
169  %i = getelementptr float* %p, i32 %q
170  %x = load float* %i
171  ret float %x
172}
173
174define ptx_device double @t3_f64(double* %p, i32 %q) {
175entry:
176;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
177;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
178;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
179  %i = getelementptr double* %p, i32 %q
180  %x = load double* %i
181  ret double %x
182}
183
184define ptx_device i16 @t4_global_u16() {
185entry:
186;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
187;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]];
188;CHECK: ret;
189  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
190  %x = load i16* %i
191  ret i16 %x
192}
193
194define ptx_device i32 @t4_global_u32() {
195entry:
196;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
197;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]];
198;CHECK: ret;
199  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
200  %x = load i32* %i
201  ret i32 %x
202}
203
204define ptx_device i64 @t4_global_u64() {
205entry:
206;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
207;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]];
208;CHECK: ret;
209  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
210  %x = load i64* %i
211  ret i64 %x
212}
213
214define ptx_device float @t4_global_f32() {
215entry:
216;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
217;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]];
218;CHECK: ret;
219  %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
220  %x = load float* %i
221  ret float %x
222}
223
224define ptx_device double @t4_global_f64() {
225entry:
226;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
227;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]];
228;CHECK: ret;
229  %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
230  %x = load double* %i
231  ret double %x
232}
233
234define ptx_device i16 @t4_const_u16() {
235entry:
236;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i16;
237;CHECK: ld.const.u16 %ret{{[0-9]+}}, [%r[[R0]]];
238;CHECK: ret;
239  %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
240  %x = load i16 addrspace(1)* %i
241  ret i16 %x
242}
243
244define ptx_device i32 @t4_const_u32() {
245entry:
246;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i32;
247;CHECK: ld.const.u32 %ret{{[0-9]+}}, [%r[[R0]]];
248;CHECK: ret;
249  %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
250  %x = load i32 addrspace(1)* %i
251  ret i32 %x
252}
253
254define ptx_device i64 @t4_const_u64() {
255entry:
256;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i64;
257;CHECK: ld.const.u64 %ret{{[0-9]+}}, [%r[[R0]]];
258;CHECK: ret;
259  %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
260  %x = load i64 addrspace(1)* %i
261  ret i64 %x
262}
263
264define ptx_device float @t4_const_f32() {
265entry:
266;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_float;
267;CHECK: ld.const.f32 %ret{{[0-9]+}}, [%r[[R0]]];
268;CHECK: ret;
269  %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
270  %x = load float addrspace(1)* %i
271  ret float %x
272}
273
274define ptx_device double @t4_const_f64() {
275entry:
276;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_double;
277;CHECK: ld.const.f64 %ret{{[0-9]+}}, [%r[[R0]]];
278;CHECK: ret;
279  %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
280  %x = load double addrspace(1)* %i
281  ret double %x
282}
283
284define ptx_device i16 @t4_shared_u16() {
285entry:
286;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
287;CHECK: ld.shared.u16 %ret{{[0-9]+}}, [%r[[R0]]];
288;CHECK: ret;
289  %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
290  %x = load i16 addrspace(4)* %i
291  ret i16 %x
292}
293
294define ptx_device i32 @t4_shared_u32() {
295entry:
296;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
297;CHECK: ld.shared.u32 %ret{{[0-9]+}}, [%r[[R0]]];
298;CHECK: ret;
299  %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
300  %x = load i32 addrspace(4)* %i
301  ret i32 %x
302}
303
304define ptx_device i64 @t4_shared_u64() {
305entry:
306;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
307;CHECK: ld.shared.u64 %ret{{[0-9]+}}, [%r[[R0]]];
308;CHECK: ret;
309  %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
310  %x = load i64 addrspace(4)* %i
311  ret i64 %x
312}
313
314define ptx_device float @t4_shared_f32() {
315entry:
316;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
317;CHECK: ld.shared.f32 %ret{{[0-9]+}}, [%r[[R0]]];
318;CHECK: ret;
319  %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
320  %x = load float addrspace(4)* %i
321  ret float %x
322}
323
324define ptx_device double @t4_shared_f64() {
325entry:
326;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
327;CHECK: ld.shared.f64 %ret{{[0-9]+}}, [%r[[R0]]];
328;CHECK: ret;
329  %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
330  %x = load double addrspace(4)* %i
331  ret double %x
332}
333
334define ptx_device i16 @t5_u16() {
335entry:
336;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
337;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]+2];
338;CHECK: ret;
339  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
340  %x = load i16* %i
341  ret i16 %x
342}
343
344define ptx_device i32 @t5_u32() {
345entry:
346;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
347;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]+4];
348;CHECK: ret;
349  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
350  %x = load i32* %i
351  ret i32 %x
352}
353
354define ptx_device i64 @t5_u64() {
355entry:
356;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
357;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]+8];
358;CHECK: ret;
359  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
360  %x = load i64* %i
361  ret i64 %x
362}
363
364define ptx_device float @t5_f32() {
365entry:
366;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
367;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]+4];
368;CHECK: ret;
369  %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
370  %x = load float* %i
371  ret float %x
372}
373
374define ptx_device double @t5_f64() {
375entry:
376;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
377;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]+8];
378;CHECK: ret;
379  %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
380  %x = load double* %i
381  ret double %x
382}
383