1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,HAWAII %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FIJI %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
5
6define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 {
7; CIVI-LABEL: local_store_i56:
8; CIVI:       ; %bb.0:
9; CIVI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; CIVI-NEXT:    s_mov_b32 m0, -1
11; CIVI-NEXT:    ds_write_b16 v0, v2 offset:4
12; CIVI-NEXT:    ds_write_b32 v0, v1
13; CIVI-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
14; CIVI-NEXT:    ds_write_b8 v0, v1 offset:6
15; CIVI-NEXT:    s_waitcnt lgkmcnt(0)
16; CIVI-NEXT:    s_setpc_b64 s[30:31]
17;
18; GFX9-LABEL: local_store_i56:
19; GFX9:       ; %bb.0:
20; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX9-NEXT:    ds_write_b8_d16_hi v0, v2 offset:6
22; GFX9-NEXT:    ds_write_b16 v0, v2 offset:4
23; GFX9-NEXT:    ds_write_b32 v0, v1
24; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
25; GFX9-NEXT:    s_setpc_b64 s[30:31]
26  store i56 %arg, i56 addrspace(3)* %ptr, align 8
27  ret void
28}
29
30define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 {
31; HAWAII-LABEL: local_store_i55:
32; HAWAII:       ; %bb.0:
33; HAWAII-NEXT:    s_or_b32 s0, s4, 14
34; HAWAII-NEXT:    v_mov_b32_e32 v0, s0
35; HAWAII-NEXT:    v_mov_b32_e32 v1, s5
36; HAWAII-NEXT:    flat_load_ubyte v0, v[0:1]
37; HAWAII-NEXT:    s_load_dword s2, s[4:5], 0x3
38; HAWAII-NEXT:    s_load_dword s0, s[4:5], 0x0
39; HAWAII-NEXT:    s_load_dword s1, s[4:5], 0x2
40; HAWAII-NEXT:    s_mov_b32 m0, -1
41; HAWAII-NEXT:    s_waitcnt lgkmcnt(0)
42; HAWAII-NEXT:    v_mov_b32_e32 v1, s0
43; HAWAII-NEXT:    v_mov_b32_e32 v3, s2
44; HAWAII-NEXT:    v_mov_b32_e32 v2, s1
45; HAWAII-NEXT:    ds_write_b16 v1, v3 offset:4
46; HAWAII-NEXT:    s_waitcnt vmcnt(0)
47; HAWAII-NEXT:    v_and_b32_e32 v0, 0x7f, v0
48; HAWAII-NEXT:    ds_write_b8 v1, v0 offset:6
49; HAWAII-NEXT:    ds_write_b32 v1, v2
50; HAWAII-NEXT:    s_endpgm
51;
52; FIJI-LABEL: local_store_i55:
53; FIJI:       ; %bb.0:
54; FIJI-NEXT:    s_or_b32 s0, s4, 14
55; FIJI-NEXT:    v_mov_b32_e32 v0, s0
56; FIJI-NEXT:    v_mov_b32_e32 v1, s5
57; FIJI-NEXT:    flat_load_ubyte v0, v[0:1]
58; FIJI-NEXT:    s_load_dword s0, s[4:5], 0x0
59; FIJI-NEXT:    s_load_dword s1, s[4:5], 0x8
60; FIJI-NEXT:    s_load_dword s2, s[4:5], 0xc
61; FIJI-NEXT:    s_mov_b32 m0, -1
62; FIJI-NEXT:    s_waitcnt lgkmcnt(0)
63; FIJI-NEXT:    v_mov_b32_e32 v1, s0
64; FIJI-NEXT:    v_mov_b32_e32 v3, s1
65; FIJI-NEXT:    s_and_b32 s3, s2, 0xffff
66; FIJI-NEXT:    v_mov_b32_e32 v2, s2
67; FIJI-NEXT:    ds_write_b16 v1, v2 offset:4
68; FIJI-NEXT:    s_waitcnt vmcnt(0)
69; FIJI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
70; FIJI-NEXT:    v_or_b32_e32 v0, s3, v0
71; FIJI-NEXT:    v_bfe_u32 v0, v0, 16, 7
72; FIJI-NEXT:    ds_write_b8 v1, v0 offset:6
73; FIJI-NEXT:    ds_write_b32 v1, v3
74; FIJI-NEXT:    s_endpgm
75;
76; GFX9-LABEL: local_store_i55:
77; GFX9:       ; %bb.0:
78; GFX9-NEXT:    v_mov_b32_e32 v0, 0
79; GFX9-NEXT:    global_load_ubyte_d16_hi v0, v0, s[4:5] offset:14
80; GFX9-NEXT:    s_load_dword s2, s[4:5], 0xc
81; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
82; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x8
83; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
84; GFX9-NEXT:    s_and_b32 s3, s2, 0xffff
85; GFX9-NEXT:    v_mov_b32_e32 v1, s0
86; GFX9-NEXT:    v_mov_b32_e32 v2, s2
87; GFX9-NEXT:    v_mov_b32_e32 v3, s1
88; GFX9-NEXT:    ds_write_b16 v1, v2 offset:4
89; GFX9-NEXT:    s_waitcnt vmcnt(0)
90; GFX9-NEXT:    v_or_b32_e32 v0, s3, v0
91; GFX9-NEXT:    v_and_b32_e32 v0, 0x7fffff, v0
92; GFX9-NEXT:    ds_write_b8_d16_hi v1, v0 offset:6
93; GFX9-NEXT:    ds_write_b32 v1, v3
94; GFX9-NEXT:    s_endpgm
95  store i55 %arg, i55 addrspace(3)* %ptr, align 8
96  ret void
97}
98
99define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0 {
100; HAWAII-LABEL: local_store_i48:
101; HAWAII:       ; %bb.0:
102; HAWAII-NEXT:    s_load_dword s0, s[4:5], 0x0
103; HAWAII-NEXT:    s_load_dword s1, s[4:5], 0x2
104; HAWAII-NEXT:    s_load_dword s2, s[4:5], 0x3
105; HAWAII-NEXT:    s_mov_b32 m0, -1
106; HAWAII-NEXT:    s_waitcnt lgkmcnt(0)
107; HAWAII-NEXT:    v_mov_b32_e32 v0, s0
108; HAWAII-NEXT:    v_mov_b32_e32 v1, s2
109; HAWAII-NEXT:    ds_write_b16 v0, v1 offset:4
110; HAWAII-NEXT:    v_mov_b32_e32 v1, s1
111; HAWAII-NEXT:    ds_write_b32 v0, v1
112; HAWAII-NEXT:    s_endpgm
113;
114; FIJI-LABEL: local_store_i48:
115; FIJI:       ; %bb.0:
116; FIJI-NEXT:    s_load_dword s0, s[4:5], 0x0
117; FIJI-NEXT:    s_load_dword s1, s[4:5], 0x8
118; FIJI-NEXT:    s_load_dword s2, s[4:5], 0xc
119; FIJI-NEXT:    s_mov_b32 m0, -1
120; FIJI-NEXT:    s_waitcnt lgkmcnt(0)
121; FIJI-NEXT:    v_mov_b32_e32 v0, s0
122; FIJI-NEXT:    v_mov_b32_e32 v1, s2
123; FIJI-NEXT:    ds_write_b16 v0, v1 offset:4
124; FIJI-NEXT:    v_mov_b32_e32 v1, s1
125; FIJI-NEXT:    ds_write_b32 v0, v1
126; FIJI-NEXT:    s_endpgm
127;
128; GFX9-LABEL: local_store_i48:
129; GFX9:       ; %bb.0:
130; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
131; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x8
132; GFX9-NEXT:    s_load_dword s2, s[4:5], 0xc
133; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
134; GFX9-NEXT:    v_mov_b32_e32 v0, s0
135; GFX9-NEXT:    v_mov_b32_e32 v2, s1
136; GFX9-NEXT:    v_mov_b32_e32 v1, s2
137; GFX9-NEXT:    ds_write_b16 v0, v1 offset:4
138; GFX9-NEXT:    ds_write_b32 v0, v2
139; GFX9-NEXT:    s_endpgm
140  store i48 %arg, i48 addrspace(3)* %ptr, align 8
141  ret void
142}
143
144define amdgpu_kernel void @local_store_i65(i65 addrspace(3)* %ptr, i65 %arg) #0 {
145; HAWAII-LABEL: local_store_i65:
146; HAWAII:       ; %bb.0:
147; HAWAII-NEXT:    s_load_dword s2, s[4:5], 0x0
148; HAWAII-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
149; HAWAII-NEXT:    s_load_dword s3, s[4:5], 0x4
150; HAWAII-NEXT:    s_mov_b32 m0, -1
151; HAWAII-NEXT:    s_waitcnt lgkmcnt(0)
152; HAWAII-NEXT:    v_mov_b32_e32 v2, s2
153; HAWAII-NEXT:    s_and_b32 s3, s3, 1
154; HAWAII-NEXT:    v_mov_b32_e32 v0, s3
155; HAWAII-NEXT:    ds_write_b8 v2, v0 offset:8
156; HAWAII-NEXT:    v_mov_b32_e32 v0, s0
157; HAWAII-NEXT:    v_mov_b32_e32 v1, s1
158; HAWAII-NEXT:    ds_write_b64 v2, v[0:1]
159; HAWAII-NEXT:    s_endpgm
160;
161; FIJI-LABEL: local_store_i65:
162; FIJI:       ; %bb.0:
163; FIJI-NEXT:    s_load_dword s2, s[4:5], 0x0
164; FIJI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
165; FIJI-NEXT:    s_load_dword s3, s[4:5], 0x10
166; FIJI-NEXT:    s_mov_b32 m0, -1
167; FIJI-NEXT:    s_waitcnt lgkmcnt(0)
168; FIJI-NEXT:    v_mov_b32_e32 v2, s2
169; FIJI-NEXT:    s_and_b32 s3, s3, 1
170; FIJI-NEXT:    v_mov_b32_e32 v0, s3
171; FIJI-NEXT:    ds_write_b8 v2, v0 offset:8
172; FIJI-NEXT:    v_mov_b32_e32 v0, s0
173; FIJI-NEXT:    v_mov_b32_e32 v1, s1
174; FIJI-NEXT:    ds_write_b64 v2, v[0:1]
175; FIJI-NEXT:    s_endpgm
176;
177; GFX9-LABEL: local_store_i65:
178; GFX9:       ; %bb.0:
179; GFX9-NEXT:    s_load_dword s2, s[4:5], 0x0
180; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
181; GFX9-NEXT:    s_load_dword s3, s[4:5], 0x10
182; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
183; GFX9-NEXT:    v_mov_b32_e32 v2, s2
184; GFX9-NEXT:    v_mov_b32_e32 v0, s0
185; GFX9-NEXT:    s_and_b32 s3, s3, 1
186; GFX9-NEXT:    v_mov_b32_e32 v3, s3
187; GFX9-NEXT:    v_mov_b32_e32 v1, s1
188; GFX9-NEXT:    ds_write_b8 v2, v3 offset:8
189; GFX9-NEXT:    ds_write_b64 v2, v[0:1]
190; GFX9-NEXT:    s_endpgm
191  store i65 %arg, i65 addrspace(3)* %ptr, align 8
192  ret void
193}
194
195define void @local_store_i13(i13 addrspace(3)* %ptr, i13 %arg) #0 {
196; CIVI-LABEL: local_store_i13:
197; CIVI:       ; %bb.0:
198; CIVI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199; CIVI-NEXT:    v_and_b32_e32 v1, 0x1fff, v1
200; CIVI-NEXT:    s_mov_b32 m0, -1
201; CIVI-NEXT:    ds_write_b16 v0, v1
202; CIVI-NEXT:    s_waitcnt lgkmcnt(0)
203; CIVI-NEXT:    s_setpc_b64 s[30:31]
204;
205; GFX9-LABEL: local_store_i13:
206; GFX9:       ; %bb.0:
207; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208; GFX9-NEXT:    v_and_b32_e32 v1, 0x1fff, v1
209; GFX9-NEXT:    ds_write_b16 v0, v1
210; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
211; GFX9-NEXT:    s_setpc_b64 s[30:31]
212  store i13 %arg, i13 addrspace(3)* %ptr, align 8
213  ret void
214}
215
216define void @local_store_i17(i17 addrspace(3)* %ptr, i17 %arg) #0 {
217; CIVI-LABEL: local_store_i17:
218; CIVI:       ; %bb.0:
219; CIVI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220; CIVI-NEXT:    s_mov_b32 m0, -1
221; CIVI-NEXT:    ds_write_b16 v0, v1
222; CIVI-NEXT:    v_bfe_u32 v1, v1, 16, 1
223; CIVI-NEXT:    ds_write_b8 v0, v1 offset:2
224; CIVI-NEXT:    s_waitcnt lgkmcnt(0)
225; CIVI-NEXT:    s_setpc_b64 s[30:31]
226;
227; GFX9-LABEL: local_store_i17:
228; GFX9:       ; %bb.0:
229; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230; GFX9-NEXT:    ds_write_b16 v0, v1
231; GFX9-NEXT:    v_and_b32_e32 v1, 0x1ffff, v1
232; GFX9-NEXT:    ds_write_b8_d16_hi v0, v1 offset:2
233; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
234; GFX9-NEXT:    s_setpc_b64 s[30:31]
235  store i17 %arg, i17 addrspace(3)* %ptr, align 8
236  ret void
237}
238
239attributes #0 = { nounwind }
240