1; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
2; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
3
4
5;; i8
6
7define void @st_global_i8(i8 addrspace(1)* %ptr, i8 %a) {
8; PTX32: st.global.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
9; PTX32: ret
10; PTX64: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
11; PTX64: ret
12  store i8 %a, i8 addrspace(1)* %ptr
13  ret void
14}
15
16define void @st_shared_i8(i8 addrspace(3)* %ptr, i8 %a) {
17; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
18; PTX32: ret
19; PTX64: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
20; PTX64: ret
21  store i8 %a, i8 addrspace(3)* %ptr
22  ret void
23}
24
25define void @st_local_i8(i8 addrspace(5)* %ptr, i8 %a) {
26; PTX32: st.local.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
27; PTX32: ret
28; PTX64: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
29; PTX64: ret
30  store i8 %a, i8 addrspace(5)* %ptr
31  ret void
32}
33
34;; i16
35
36define void @st_global_i16(i16 addrspace(1)* %ptr, i16 %a) {
37; PTX32: st.global.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
38; PTX32: ret
39; PTX64: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
40; PTX64: ret
41  store i16 %a, i16 addrspace(1)* %ptr
42  ret void
43}
44
45define void @st_shared_i16(i16 addrspace(3)* %ptr, i16 %a) {
46; PTX32: st.shared.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
47; PTX32: ret
48; PTX64: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
49; PTX64: ret
50  store i16 %a, i16 addrspace(3)* %ptr
51  ret void
52}
53
54define void @st_local_i16(i16 addrspace(5)* %ptr, i16 %a) {
55; PTX32: st.local.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
56; PTX32: ret
57; PTX64: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
58; PTX64: ret
59  store i16 %a, i16 addrspace(5)* %ptr
60  ret void
61}
62
63;; i32
64
65define void @st_global_i32(i32 addrspace(1)* %ptr, i32 %a) {
66; PTX32: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
67; PTX32: ret
68; PTX64: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
69; PTX64: ret
70  store i32 %a, i32 addrspace(1)* %ptr
71  ret void
72}
73
74define void @st_shared_i32(i32 addrspace(3)* %ptr, i32 %a) {
75; PTX32: st.shared.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
76; PTX32: ret
77; PTX64: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
78; PTX64: ret
79  store i32 %a, i32 addrspace(3)* %ptr
80  ret void
81}
82
83define void @st_local_i32(i32 addrspace(5)* %ptr, i32 %a) {
84; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
85; PTX32: ret
86; PTX64: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
87; PTX64: ret
88  store i32 %a, i32 addrspace(5)* %ptr
89  ret void
90}
91
92;; i64
93
94define void @st_global_i64(i64 addrspace(1)* %ptr, i64 %a) {
95; PTX32: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
96; PTX32: ret
97; PTX64: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
98; PTX64: ret
99  store i64 %a, i64 addrspace(1)* %ptr
100  ret void
101}
102
103define void @st_shared_i64(i64 addrspace(3)* %ptr, i64 %a) {
104; PTX32: st.shared.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
105; PTX32: ret
106; PTX64: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
107; PTX64: ret
108  store i64 %a, i64 addrspace(3)* %ptr
109  ret void
110}
111
112define void @st_local_i64(i64 addrspace(5)* %ptr, i64 %a) {
113; PTX32: st.local.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
114; PTX32: ret
115; PTX64: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
116; PTX64: ret
117  store i64 %a, i64 addrspace(5)* %ptr
118  ret void
119}
120
121;; f32
122
123define void @st_global_f32(float addrspace(1)* %ptr, float %a) {
124; PTX32: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
125; PTX32: ret
126; PTX64: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
127; PTX64: ret
128  store float %a, float addrspace(1)* %ptr
129  ret void
130}
131
132define void @st_shared_f32(float addrspace(3)* %ptr, float %a) {
133; PTX32: st.shared.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
134; PTX32: ret
135; PTX64: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
136; PTX64: ret
137  store float %a, float addrspace(3)* %ptr
138  ret void
139}
140
141define void @st_local_f32(float addrspace(5)* %ptr, float %a) {
142; PTX32: st.local.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
143; PTX32: ret
144; PTX64: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
145; PTX64: ret
146  store float %a, float addrspace(5)* %ptr
147  ret void
148}
149
150;; f64
151
152define void @st_global_f64(double addrspace(1)* %ptr, double %a) {
153; PTX32: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
154; PTX32: ret
155; PTX64: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
156; PTX64: ret
157  store double %a, double addrspace(1)* %ptr
158  ret void
159}
160
161define void @st_shared_f64(double addrspace(3)* %ptr, double %a) {
162; PTX32: st.shared.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
163; PTX32: ret
164; PTX64: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
165; PTX64: ret
166  store double %a, double addrspace(3)* %ptr
167  ret void
168}
169
170define void @st_local_f64(double addrspace(5)* %ptr, double %a) {
171; PTX32: st.local.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
172; PTX32: ret
173; PTX64: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
174; PTX64: ret
175  store double %a, double addrspace(5)* %ptr
176  ret void
177}
178