1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s
5
6define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
7; GFX9-LABEL: store_lds_v3i32:
8; GFX9:       ; %bb.0:
9; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
10; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
11; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
12; GFX9-NEXT:    v_mov_b32_e32 v3, s4
13; GFX9-NEXT:    v_mov_b32_e32 v0, s0
14; GFX9-NEXT:    v_mov_b32_e32 v1, s1
15; GFX9-NEXT:    v_mov_b32_e32 v2, s2
16; GFX9-NEXT:    ds_write_b96 v3, v[0:2]
17; GFX9-NEXT:    s_endpgm
18;
19; GFX7-LABEL: store_lds_v3i32:
20; GFX7:       ; %bb.0:
21; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
22; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
23; GFX7-NEXT:    s_mov_b32 m0, -1
24; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
25; GFX7-NEXT:    v_mov_b32_e32 v3, s4
26; GFX7-NEXT:    v_mov_b32_e32 v0, s0
27; GFX7-NEXT:    v_mov_b32_e32 v1, s1
28; GFX7-NEXT:    v_mov_b32_e32 v2, s2
29; GFX7-NEXT:    ds_write_b96 v3, v[0:2]
30; GFX7-NEXT:    s_endpgm
31;
32; GFX6-LABEL: store_lds_v3i32:
33; GFX6:       ; %bb.0:
34; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
35; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
36; GFX6-NEXT:    s_mov_b32 m0, -1
37; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
38; GFX6-NEXT:    v_mov_b32_e32 v2, s4
39; GFX6-NEXT:    v_mov_b32_e32 v1, s2
40; GFX6-NEXT:    ds_write_b32 v2, v1 offset:8
41; GFX6-NEXT:    v_mov_b32_e32 v0, s0
42; GFX6-NEXT:    v_mov_b32_e32 v1, s1
43; GFX6-NEXT:    ds_write_b64 v2, v[0:1]
44; GFX6-NEXT:    s_endpgm
45  store <3 x i32> %x, <3 x i32> addrspace(3)* %out
46  ret void
47}
48
49define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
50; GFX9-LABEL: store_lds_v3i32_align1:
51; GFX9:       ; %bb.0:
52; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
53; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
54; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
55; GFX9-NEXT:    v_mov_b32_e32 v0, s4
56; GFX9-NEXT:    v_mov_b32_e32 v1, s2
57; GFX9-NEXT:    v_mov_b32_e32 v2, s1
58; GFX9-NEXT:    ds_write_b8 v0, v1 offset:8
59; GFX9-NEXT:    ds_write_b8_d16_hi v0, v1 offset:10
60; GFX9-NEXT:    ds_write_b8 v0, v2 offset:4
61; GFX9-NEXT:    ds_write_b8_d16_hi v0, v2 offset:6
62; GFX9-NEXT:    v_mov_b32_e32 v1, s0
63; GFX9-NEXT:    s_lshr_b32 s3, s2, 8
64; GFX9-NEXT:    ds_write_b8 v0, v1
65; GFX9-NEXT:    ds_write_b8_d16_hi v0, v1 offset:2
66; GFX9-NEXT:    v_mov_b32_e32 v1, s3
67; GFX9-NEXT:    s_lshr_b32 s2, s2, 24
68; GFX9-NEXT:    ds_write_b8 v0, v1 offset:9
69; GFX9-NEXT:    v_mov_b32_e32 v1, s2
70; GFX9-NEXT:    s_lshr_b32 s2, s1, 8
71; GFX9-NEXT:    ds_write_b8 v0, v1 offset:11
72; GFX9-NEXT:    v_mov_b32_e32 v1, s2
73; GFX9-NEXT:    s_lshr_b32 s1, s1, 24
74; GFX9-NEXT:    ds_write_b8 v0, v1 offset:5
75; GFX9-NEXT:    v_mov_b32_e32 v1, s1
76; GFX9-NEXT:    s_lshr_b32 s1, s0, 8
77; GFX9-NEXT:    ds_write_b8 v0, v1 offset:7
78; GFX9-NEXT:    v_mov_b32_e32 v1, s1
79; GFX9-NEXT:    s_lshr_b32 s0, s0, 24
80; GFX9-NEXT:    ds_write_b8 v0, v1 offset:1
81; GFX9-NEXT:    v_mov_b32_e32 v1, s0
82; GFX9-NEXT:    ds_write_b8 v0, v1 offset:3
83; GFX9-NEXT:    s_endpgm
84;
85; GFX7-LABEL: store_lds_v3i32_align1:
86; GFX7:       ; %bb.0:
87; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
88; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
89; GFX7-NEXT:    s_mov_b32 m0, -1
90; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
91; GFX7-NEXT:    v_mov_b32_e32 v0, s4
92; GFX7-NEXT:    v_mov_b32_e32 v1, s2
93; GFX7-NEXT:    v_mov_b32_e32 v2, s1
94; GFX7-NEXT:    ds_write_b8 v0, v1 offset:8
95; GFX7-NEXT:    ds_write_b8 v0, v2 offset:4
96; GFX7-NEXT:    v_mov_b32_e32 v1, s0
97; GFX7-NEXT:    s_lshr_b32 s3, s2, 8
98; GFX7-NEXT:    ds_write_b8 v0, v1
99; GFX7-NEXT:    v_mov_b32_e32 v1, s3
100; GFX7-NEXT:    s_lshr_b32 s3, s2, 24
101; GFX7-NEXT:    ds_write_b8 v0, v1 offset:9
102; GFX7-NEXT:    v_mov_b32_e32 v1, s3
103; GFX7-NEXT:    s_lshr_b32 s2, s2, 16
104; GFX7-NEXT:    ds_write_b8 v0, v1 offset:11
105; GFX7-NEXT:    v_mov_b32_e32 v1, s2
106; GFX7-NEXT:    s_lshr_b32 s2, s1, 8
107; GFX7-NEXT:    ds_write_b8 v0, v1 offset:10
108; GFX7-NEXT:    v_mov_b32_e32 v1, s2
109; GFX7-NEXT:    s_lshr_b32 s2, s1, 24
110; GFX7-NEXT:    ds_write_b8 v0, v1 offset:5
111; GFX7-NEXT:    v_mov_b32_e32 v1, s2
112; GFX7-NEXT:    s_lshr_b32 s1, s1, 16
113; GFX7-NEXT:    ds_write_b8 v0, v1 offset:7
114; GFX7-NEXT:    v_mov_b32_e32 v1, s1
115; GFX7-NEXT:    s_lshr_b32 s1, s0, 8
116; GFX7-NEXT:    ds_write_b8 v0, v1 offset:6
117; GFX7-NEXT:    v_mov_b32_e32 v1, s1
118; GFX7-NEXT:    s_lshr_b32 s1, s0, 24
119; GFX7-NEXT:    ds_write_b8 v0, v1 offset:1
120; GFX7-NEXT:    v_mov_b32_e32 v1, s1
121; GFX7-NEXT:    s_lshr_b32 s0, s0, 16
122; GFX7-NEXT:    ds_write_b8 v0, v1 offset:3
123; GFX7-NEXT:    v_mov_b32_e32 v1, s0
124; GFX7-NEXT:    ds_write_b8 v0, v1 offset:2
125; GFX7-NEXT:    s_endpgm
126;
127; GFX6-LABEL: store_lds_v3i32_align1:
128; GFX6:       ; %bb.0:
129; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
130; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
131; GFX6-NEXT:    s_mov_b32 m0, -1
132; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
133; GFX6-NEXT:    v_mov_b32_e32 v0, s4
134; GFX6-NEXT:    v_mov_b32_e32 v1, s2
135; GFX6-NEXT:    v_mov_b32_e32 v2, s1
136; GFX6-NEXT:    ds_write_b8 v0, v1 offset:8
137; GFX6-NEXT:    ds_write_b8 v0, v2 offset:4
138; GFX6-NEXT:    v_mov_b32_e32 v1, s0
139; GFX6-NEXT:    s_lshr_b32 s3, s2, 8
140; GFX6-NEXT:    ds_write_b8 v0, v1
141; GFX6-NEXT:    v_mov_b32_e32 v1, s3
142; GFX6-NEXT:    s_lshr_b32 s3, s2, 24
143; GFX6-NEXT:    ds_write_b8 v0, v1 offset:9
144; GFX6-NEXT:    v_mov_b32_e32 v1, s3
145; GFX6-NEXT:    s_lshr_b32 s2, s2, 16
146; GFX6-NEXT:    ds_write_b8 v0, v1 offset:11
147; GFX6-NEXT:    v_mov_b32_e32 v1, s2
148; GFX6-NEXT:    s_lshr_b32 s2, s1, 8
149; GFX6-NEXT:    ds_write_b8 v0, v1 offset:10
150; GFX6-NEXT:    v_mov_b32_e32 v1, s2
151; GFX6-NEXT:    s_lshr_b32 s2, s1, 24
152; GFX6-NEXT:    ds_write_b8 v0, v1 offset:5
153; GFX6-NEXT:    v_mov_b32_e32 v1, s2
154; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
155; GFX6-NEXT:    ds_write_b8 v0, v1 offset:7
156; GFX6-NEXT:    v_mov_b32_e32 v1, s1
157; GFX6-NEXT:    s_lshr_b32 s1, s0, 8
158; GFX6-NEXT:    ds_write_b8 v0, v1 offset:6
159; GFX6-NEXT:    v_mov_b32_e32 v1, s1
160; GFX6-NEXT:    s_lshr_b32 s1, s0, 24
161; GFX6-NEXT:    ds_write_b8 v0, v1 offset:1
162; GFX6-NEXT:    v_mov_b32_e32 v1, s1
163; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
164; GFX6-NEXT:    ds_write_b8 v0, v1 offset:3
165; GFX6-NEXT:    v_mov_b32_e32 v1, s0
166; GFX6-NEXT:    ds_write_b8 v0, v1 offset:2
167; GFX6-NEXT:    s_endpgm
168  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1
169  ret void
170}
171
172define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
173; GFX9-LABEL: store_lds_v3i32_align2:
174; GFX9:       ; %bb.0:
175; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
176; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
177; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
178; GFX9-NEXT:    v_mov_b32_e32 v0, s4
179; GFX9-NEXT:    v_mov_b32_e32 v1, s2
180; GFX9-NEXT:    v_mov_b32_e32 v2, s1
181; GFX9-NEXT:    ds_write_b16 v0, v1 offset:8
182; GFX9-NEXT:    ds_write_b16_d16_hi v0, v1 offset:10
183; GFX9-NEXT:    ds_write_b16 v0, v2 offset:4
184; GFX9-NEXT:    ds_write_b16_d16_hi v0, v2 offset:6
185; GFX9-NEXT:    v_mov_b32_e32 v1, s0
186; GFX9-NEXT:    ds_write_b16 v0, v1
187; GFX9-NEXT:    ds_write_b16_d16_hi v0, v1 offset:2
188; GFX9-NEXT:    s_endpgm
189;
190; GFX7-LABEL: store_lds_v3i32_align2:
191; GFX7:       ; %bb.0:
192; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
193; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
194; GFX7-NEXT:    s_mov_b32 m0, -1
195; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
196; GFX7-NEXT:    v_mov_b32_e32 v0, s4
197; GFX7-NEXT:    v_mov_b32_e32 v1, s2
198; GFX7-NEXT:    v_mov_b32_e32 v2, s1
199; GFX7-NEXT:    ds_write_b16 v0, v1 offset:8
200; GFX7-NEXT:    ds_write_b16 v0, v2 offset:4
201; GFX7-NEXT:    v_mov_b32_e32 v1, s0
202; GFX7-NEXT:    s_lshr_b32 s2, s2, 16
203; GFX7-NEXT:    ds_write_b16 v0, v1
204; GFX7-NEXT:    v_mov_b32_e32 v1, s2
205; GFX7-NEXT:    s_lshr_b32 s1, s1, 16
206; GFX7-NEXT:    ds_write_b16 v0, v1 offset:10
207; GFX7-NEXT:    v_mov_b32_e32 v1, s1
208; GFX7-NEXT:    s_lshr_b32 s0, s0, 16
209; GFX7-NEXT:    ds_write_b16 v0, v1 offset:6
210; GFX7-NEXT:    v_mov_b32_e32 v1, s0
211; GFX7-NEXT:    ds_write_b16 v0, v1 offset:2
212; GFX7-NEXT:    s_endpgm
213;
214; GFX6-LABEL: store_lds_v3i32_align2:
215; GFX6:       ; %bb.0:
216; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
217; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
218; GFX6-NEXT:    s_mov_b32 m0, -1
219; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
220; GFX6-NEXT:    v_mov_b32_e32 v0, s4
221; GFX6-NEXT:    v_mov_b32_e32 v1, s2
222; GFX6-NEXT:    v_mov_b32_e32 v2, s1
223; GFX6-NEXT:    ds_write_b16 v0, v1 offset:8
224; GFX6-NEXT:    ds_write_b16 v0, v2 offset:4
225; GFX6-NEXT:    v_mov_b32_e32 v1, s0
226; GFX6-NEXT:    s_lshr_b32 s2, s2, 16
227; GFX6-NEXT:    ds_write_b16 v0, v1
228; GFX6-NEXT:    v_mov_b32_e32 v1, s2
229; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
230; GFX6-NEXT:    ds_write_b16 v0, v1 offset:10
231; GFX6-NEXT:    v_mov_b32_e32 v1, s1
232; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
233; GFX6-NEXT:    ds_write_b16 v0, v1 offset:6
234; GFX6-NEXT:    v_mov_b32_e32 v1, s0
235; GFX6-NEXT:    ds_write_b16 v0, v1 offset:2
236; GFX6-NEXT:    s_endpgm
237  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 2
238  ret void
239}
240
241define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
242; GFX9-LABEL: store_lds_v3i32_align4:
243; GFX9:       ; %bb.0:
244; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
245; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
246; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
247; GFX9-NEXT:    v_mov_b32_e32 v0, s4
248; GFX9-NEXT:    v_mov_b32_e32 v1, s0
249; GFX9-NEXT:    v_mov_b32_e32 v2, s1
250; GFX9-NEXT:    v_mov_b32_e32 v3, s2
251; GFX9-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
252; GFX9-NEXT:    ds_write_b32 v0, v3 offset:8
253; GFX9-NEXT:    s_endpgm
254;
255; GFX7-LABEL: store_lds_v3i32_align4:
256; GFX7:       ; %bb.0:
257; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
258; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
259; GFX7-NEXT:    s_mov_b32 m0, -1
260; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
261; GFX7-NEXT:    v_mov_b32_e32 v0, s4
262; GFX7-NEXT:    v_mov_b32_e32 v1, s0
263; GFX7-NEXT:    v_mov_b32_e32 v2, s1
264; GFX7-NEXT:    ds_write2_b32 v0, v1, v2 offset1:1
265; GFX7-NEXT:    v_mov_b32_e32 v1, s2
266; GFX7-NEXT:    ds_write_b32 v0, v1 offset:8
267; GFX7-NEXT:    s_endpgm
268;
269; GFX6-LABEL: store_lds_v3i32_align4:
270; GFX6:       ; %bb.0:
271; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
272; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
273; GFX6-NEXT:    s_mov_b32 m0, -1
274; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
275; GFX6-NEXT:    v_mov_b32_e32 v0, s4
276; GFX6-NEXT:    v_mov_b32_e32 v1, s1
277; GFX6-NEXT:    v_mov_b32_e32 v2, s0
278; GFX6-NEXT:    ds_write2_b32 v0, v2, v1 offset1:1
279; GFX6-NEXT:    v_mov_b32_e32 v1, s2
280; GFX6-NEXT:    ds_write_b32 v0, v1 offset:8
281; GFX6-NEXT:    s_endpgm
282  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 4
283  ret void
284}
285
286define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
287; GFX9-LABEL: store_lds_v3i32_align8:
288; GFX9:       ; %bb.0:
289; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
290; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
291; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
292; GFX9-NEXT:    v_mov_b32_e32 v2, s4
293; GFX9-NEXT:    v_mov_b32_e32 v3, s2
294; GFX9-NEXT:    v_mov_b32_e32 v0, s0
295; GFX9-NEXT:    v_mov_b32_e32 v1, s1
296; GFX9-NEXT:    ds_write_b32 v2, v3 offset:8
297; GFX9-NEXT:    ds_write_b64 v2, v[0:1]
298; GFX9-NEXT:    s_endpgm
299;
300; GFX7-LABEL: store_lds_v3i32_align8:
301; GFX7:       ; %bb.0:
302; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
303; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
304; GFX7-NEXT:    s_mov_b32 m0, -1
305; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
306; GFX7-NEXT:    v_mov_b32_e32 v2, s4
307; GFX7-NEXT:    v_mov_b32_e32 v1, s2
308; GFX7-NEXT:    ds_write_b32 v2, v1 offset:8
309; GFX7-NEXT:    v_mov_b32_e32 v0, s0
310; GFX7-NEXT:    v_mov_b32_e32 v1, s1
311; GFX7-NEXT:    ds_write_b64 v2, v[0:1]
312; GFX7-NEXT:    s_endpgm
313;
314; GFX6-LABEL: store_lds_v3i32_align8:
315; GFX6:       ; %bb.0:
316; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
317; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
318; GFX6-NEXT:    s_mov_b32 m0, -1
319; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
320; GFX6-NEXT:    v_mov_b32_e32 v2, s4
321; GFX6-NEXT:    v_mov_b32_e32 v1, s2
322; GFX6-NEXT:    ds_write_b32 v2, v1 offset:8
323; GFX6-NEXT:    v_mov_b32_e32 v0, s0
324; GFX6-NEXT:    v_mov_b32_e32 v1, s1
325; GFX6-NEXT:    ds_write_b64 v2, v[0:1]
326; GFX6-NEXT:    s_endpgm
327  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 8
328  ret void
329}
330
331define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
332; GFX9-LABEL: store_lds_v3i32_align16:
333; GFX9:       ; %bb.0:
334; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
335; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x34
336; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
337; GFX9-NEXT:    v_mov_b32_e32 v3, s4
338; GFX9-NEXT:    v_mov_b32_e32 v0, s0
339; GFX9-NEXT:    v_mov_b32_e32 v1, s1
340; GFX9-NEXT:    v_mov_b32_e32 v2, s2
341; GFX9-NEXT:    ds_write_b96 v3, v[0:2]
342; GFX9-NEXT:    s_endpgm
343;
344; GFX7-LABEL: store_lds_v3i32_align16:
345; GFX7:       ; %bb.0:
346; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
347; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
348; GFX7-NEXT:    s_mov_b32 m0, -1
349; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
350; GFX7-NEXT:    v_mov_b32_e32 v3, s4
351; GFX7-NEXT:    v_mov_b32_e32 v0, s0
352; GFX7-NEXT:    v_mov_b32_e32 v1, s1
353; GFX7-NEXT:    v_mov_b32_e32 v2, s2
354; GFX7-NEXT:    ds_write_b96 v3, v[0:2]
355; GFX7-NEXT:    s_endpgm
356;
357; GFX6-LABEL: store_lds_v3i32_align16:
358; GFX6:       ; %bb.0:
359; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
360; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
361; GFX6-NEXT:    s_mov_b32 m0, -1
362; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
363; GFX6-NEXT:    v_mov_b32_e32 v2, s4
364; GFX6-NEXT:    v_mov_b32_e32 v1, s2
365; GFX6-NEXT:    ds_write_b32 v2, v1 offset:8
366; GFX6-NEXT:    v_mov_b32_e32 v0, s0
367; GFX6-NEXT:    v_mov_b32_e32 v1, s1
368; GFX6-NEXT:    ds_write_b64 v2, v[0:1]
369; GFX6-NEXT:    s_endpgm
370  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 16
371  ret void
372}
373