1; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
4; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
5; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s
6
7; FUNC-LABEL: {{^}}store_local_i1:
8; SICIVI: s_mov_b32 m0
9; GFX9-NOT: m0
10
11; EG: LDS_BYTE_WRITE
12
13; CM: LDS_BYTE_WRITE
14
15; GCN: ds_write_b8
16define amdgpu_kernel void @store_local_i1(i1 addrspace(3)* %out) {
17entry:
18  store i1 true, i1 addrspace(3)* %out
19  ret void
20}
21
22; FUNC-LABEL: {{^}}store_local_i8:
23; SICIVI: s_mov_b32 m0
24; GFX9-NOT: m0
25
26; EG: LDS_BYTE_WRITE
27
28; CM: LDS_BYTE_WRITE
29
30; GCN: ds_write_b8
31define amdgpu_kernel void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
32  store i8 %in, i8 addrspace(3)* %out
33  ret void
34}
35
36; FUNC-LABEL: {{^}}store_local_i16:
37; SICIVI: s_mov_b32 m0
38; GFX9-NOT: m0
39
40; EG: LDS_SHORT_WRITE
41
42; CM: LDS_SHORT_WRITE
43
44; GCN: ds_write_b16
45define amdgpu_kernel void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
46  store i16 %in, i16 addrspace(3)* %out
47  ret void
48}
49
50; FUNC-LABEL: {{^}}store_local_v2i16:
51; SICIVI: s_mov_b32 m0
52; GFX9-NOT: m0
53
54; EG: LDS_WRITE
55
56; CM: LDS_WRITE
57
58; GCN: ds_write_b32
59define amdgpu_kernel void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
60entry:
61  store <2 x i16> %in, <2 x i16> addrspace(3)* %out
62  ret void
63}
64
65; FUNC-LABEL: {{^}}store_local_v4i8:
66; SICIVI: s_mov_b32 m0
67; GFX9-NOT: m0
68
69; EG: LDS_WRITE
70
71; CM: LDS_WRITE
72
73; GCN: ds_write_b32
74define amdgpu_kernel void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
75entry:
76  store <4 x i8> %in, <4 x i8> addrspace(3)* %out
77  ret void
78}
79
80; FUNC-LABEL: {{^}}store_local_v4i8_unaligned:
81; SICIVI: s_mov_b32 m0
82; GFX9-NOT: m0
83
84; EG: LDS_BYTE_WRITE
85; EG: LDS_BYTE_WRITE
86; EG: LDS_BYTE_WRITE
87; EG: LDS_BYTE_WRITE
88; EG-NOT: LDS_WRITE
89
90; CM: LDS_BYTE_WRITE
91; CM: LDS_BYTE_WRITE
92; CM: LDS_BYTE_WRITE
93; CM: LDS_BYTE_WRITE
94; CM-NOT: LDS_WRITE
95
96; GCN: ds_write_b8
97; GCN: ds_write_b8
98; GCN: ds_write_b8
99; GCN: ds_write_b8
100define amdgpu_kernel void @store_local_v4i8_unaligned(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
101entry:
102  store <4 x i8> %in, <4 x i8> addrspace(3)* %out, align 1
103  ret void
104}
105
106; FUNC-LABEL: {{^}}store_local_v4i8_halfaligned:
107; SICIVI: s_mov_b32 m0
108; GFX9-NOT: m0
109
110; EG: LDS_SHORT_WRITE
111; EG: LDS_SHORT_WRITE
112; EG-NOT: LDS_WRITE
113
114; CM: LDS_SHORT_WRITE
115; CM: LDS_SHORT_WRITE
116; CM-NOT: LDS_WRITE
117
118; GCN: ds_write_b16
119; GCN: ds_write_b16
120define amdgpu_kernel void @store_local_v4i8_halfaligned(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
121entry:
122  store <4 x i8> %in, <4 x i8> addrspace(3)* %out, align 2
123  ret void
124}
125
126; FUNC-LABEL: {{^}}store_local_v2i32:
127; SICIVI: s_mov_b32 m0
128; GFX9-NOT: m0
129
130; EG: LDS_WRITE
131; EG: LDS_WRITE
132; EG-NOT: LDS_WRITE
133
134; CM: LDS_WRITE
135; CM: LDS_WRITE
136; CM-NOT: LDS_WRITE
137
138; GCN: ds_write_b64
139define amdgpu_kernel void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
140entry:
141  store <2 x i32> %in, <2 x i32> addrspace(3)* %out
142  ret void
143}
144
145; FUNC-LABEL: {{^}}store_local_v4i32:
146; SICIVI: s_mov_b32 m0
147; GFX9-NOT: m0
148
149; EG: LDS_WRITE
150; EG: LDS_WRITE
151; EG: LDS_WRITE
152; EG: LDS_WRITE
153
154; CM: LDS_WRITE
155; CM: LDS_WRITE
156; CM: LDS_WRITE
157; CM: LDS_WRITE
158
159; GCN: ds_write2_b64
160define amdgpu_kernel void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
161entry:
162  store <4 x i32> %in, <4 x i32> addrspace(3)* %out
163  ret void
164}
165
166; FUNC-LABEL: {{^}}store_local_v4i32_align4:
167; SICIVI: s_mov_b32 m0
168; GFX9-NOT: m0
169
170; EG: LDS_WRITE
171; EG: LDS_WRITE
172; EG: LDS_WRITE
173; EG: LDS_WRITE
174
175; CM: LDS_WRITE
176; CM: LDS_WRITE
177; CM: LDS_WRITE
178; CM: LDS_WRITE
179
180; GCN: ds_write2_b32
181; GCN: ds_write2_b32
182define amdgpu_kernel void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
183entry:
184  store <4 x i32> %in, <4 x i32> addrspace(3)* %out, align 4
185  ret void
186}
187
188; FUNC-LABEL: {{^}}store_local_i64_i8:
189; SICIVI: s_mov_b32 m0
190; GFX9-NOT: m0
191
192; EG: LDS_BYTE_WRITE
193; GCN: ds_write_b8
194define amdgpu_kernel void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
195entry:
196  %0 = trunc i64 %in to i8
197  store i8 %0, i8 addrspace(3)* %out
198  ret void
199}
200
201; FUNC-LABEL: {{^}}store_local_i64_i16:
202; SICIVI: s_mov_b32 m0
203; GFX9-NOT: m0
204
205; EG: LDS_SHORT_WRITE
206; GCN: ds_write_b16
207define amdgpu_kernel void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
208entry:
209  %0 = trunc i64 %in to i16
210  store i16 %0, i16 addrspace(3)* %out
211  ret void
212}
213