1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,VI,FUNC %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
4; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
5; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s
6
7; FUNC-LABEL: {{^}}store_local_i1:
8; SICIVI: s_mov_b32 m0
9; GFX9-NOT: m0
10
11; EG: LDS_BYTE_WRITE
12
13; CM: LDS_BYTE_WRITE
14
15; GCN: ds_write_b8
16define amdgpu_kernel void @store_local_i1(i1 addrspace(3)* %out) {
17entry:
18  store i1 true, i1 addrspace(3)* %out
19  ret void
20}
21
22; FUNC-LABEL: {{^}}store_local_i8:
23; SICIVI: s_mov_b32 m0
24; GFX9-NOT: m0
25
26; EG: LDS_BYTE_WRITE
27
28; CM: LDS_BYTE_WRITE
29
30; GCN: ds_write_b8
31define amdgpu_kernel void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
32  store i8 %in, i8 addrspace(3)* %out
33  ret void
34}
35
36; FUNC-LABEL: {{^}}store_local_i16:
37; SICIVI: s_mov_b32 m0
38; GFX9-NOT: m0
39
40; EG: LDS_SHORT_WRITE
41
42; CM: LDS_SHORT_WRITE
43
44; GCN: ds_write_b16
45define amdgpu_kernel void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
46  store i16 %in, i16 addrspace(3)* %out
47  ret void
48}
49
50; FUNC-LABEL: {{^}}store_local_v2i16:
51; SICIVI: s_mov_b32 m0
52; GFX9-NOT: m0
53
54; EG: LDS_WRITE
55
56; CM: LDS_WRITE
57
58; GCN: ds_write_b32
59define amdgpu_kernel void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
60entry:
61  store <2 x i16> %in, <2 x i16> addrspace(3)* %out
62  ret void
63}
64
65; FUNC-LABEL: {{^}}store_local_v4i8:
66; SICIVI: s_mov_b32 m0
67; GFX9-NOT: m0
68
69; EG: LDS_WRITE
70
71; CM: LDS_WRITE
72
73; GCN: ds_write_b32
74define amdgpu_kernel void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
75entry:
76  store <4 x i8> %in, <4 x i8> addrspace(3)* %out
77  ret void
78}
79
80; FUNC-LABEL: {{^}}store_local_v4i8_unaligned:
81; SICIVI: s_mov_b32 m0
82; GFX9-NOT: m0
83
84; EG: LDS_BYTE_WRITE
85; EG: LDS_BYTE_WRITE
86; EG: LDS_BYTE_WRITE
87; EG: LDS_BYTE_WRITE
88; EG-NOT: LDS_WRITE
89
90; CM: LDS_BYTE_WRITE
91; CM: LDS_BYTE_WRITE
92; CM: LDS_BYTE_WRITE
93; CM: LDS_BYTE_WRITE
94; CM-NOT: LDS_WRITE
95
96; GCN: ds_write_b8
97; GCN: ds_write_b8
98; GCN: ds_write_b8
99; GCN: ds_write_b8
100define amdgpu_kernel void @store_local_v4i8_unaligned(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
101entry:
102  store <4 x i8> %in, <4 x i8> addrspace(3)* %out, align 1
103  ret void
104}
105
106; FUNC-LABEL: {{^}}store_local_v4i8_halfaligned:
107; SICIVI: s_mov_b32 m0
108; GFX9-NOT: m0
109
110; EG: LDS_SHORT_WRITE
111; EG: LDS_SHORT_WRITE
112; EG-NOT: LDS_WRITE
113
114; CM: LDS_SHORT_WRITE
115; CM: LDS_SHORT_WRITE
116; CM-NOT: LDS_WRITE
117
118; GCN: ds_write_b16
119; GCN: ds_write_b16
120define amdgpu_kernel void @store_local_v4i8_halfaligned(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
121entry:
122  store <4 x i8> %in, <4 x i8> addrspace(3)* %out, align 2
123  ret void
124}
125
126; FUNC-LABEL: {{^}}store_local_v2i32:
127; SICIVI: s_mov_b32 m0
128; GFX9-NOT: m0
129
130; EG: LDS_WRITE
131; EG: LDS_WRITE
132; EG-NOT: LDS_WRITE
133
134; CM: LDS_WRITE
135; CM: LDS_WRITE
136; CM-NOT: LDS_WRITE
137
138; GCN: ds_write_b64
139define amdgpu_kernel void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
140entry:
141  store <2 x i32> %in, <2 x i32> addrspace(3)* %out
142  ret void
143}
144
145; FUNC-LABEL: {{^}}store_local_v4i32:
146; SICIVI: s_mov_b32 m0
147; GFX9-NOT: m0
148
149; EG: LDS_WRITE
150; EG: LDS_WRITE
151; EG: LDS_WRITE
152; EG: LDS_WRITE
153
154; CM: LDS_WRITE
155; CM: LDS_WRITE
156; CM: LDS_WRITE
157; CM: LDS_WRITE
158
159; SI: ds_write2_b32
160; VI: ds_write_b128
161; GFX9: ds_write_b128
162define amdgpu_kernel void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
163entry:
164  store <4 x i32> %in, <4 x i32> addrspace(3)* %out
165  ret void
166}
167
168; FUNC-LABEL: {{^}}store_local_v4i32_align4:
169; SICIVI: s_mov_b32 m0
170; GFX9-NOT: m0
171
172; EG: LDS_WRITE
173; EG: LDS_WRITE
174; EG: LDS_WRITE
175; EG: LDS_WRITE
176
177; CM: LDS_WRITE
178; CM: LDS_WRITE
179; CM: LDS_WRITE
180; CM: LDS_WRITE
181
182; GCN: ds_write2_b32
183; GCN: ds_write2_b32
184define amdgpu_kernel void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
185entry:
186  store <4 x i32> %in, <4 x i32> addrspace(3)* %out, align 4
187  ret void
188}
189
190; FUNC-LABEL: {{^}}store_local_i64_i8:
191; SICIVI: s_mov_b32 m0
192; GFX9-NOT: m0
193
194; EG: LDS_BYTE_WRITE
195; GCN: ds_write_b8
196define amdgpu_kernel void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
197entry:
198  %0 = trunc i64 %in to i8
199  store i8 %0, i8 addrspace(3)* %out
200  ret void
201}
202
203; FUNC-LABEL: {{^}}store_local_i64_i16:
204; SICIVI: s_mov_b32 m0
205; GFX9-NOT: m0
206
207; EG: LDS_SHORT_WRITE
208; GCN: ds_write_b16
209define amdgpu_kernel void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
210entry:
211  %0 = trunc i64 %in to i16
212  store i16 %0, i16 addrspace(3)* %out
213  ret void
214}
215